Reference paper : We focus on the cortical anlage at mid-gestation (gestation week (GW) 17 to 18) because this period contains the major germinal zones and the developing cortical laminae containing migrating and newly born neurons, and neurodevelopmental processes occurring during this epoch are implicated in neuropsychiatric disease. To optimize detection of distinct cell types, prior to single-cell isolation we separated the cortex into: the germinal zones (ventricular zone (VZ) and subventricular zone (SVZ)) and developing cortex (subplate (SP) and cortical plate (CP)).
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import igraph as ig
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix, isspmatrix
from datetime import datetime
sys.path.append('../')
import functions as fn
print(np.__version__)
print(pd.__version__)
print(sc.__version__)
1.23.5 2.0.0 1.9.3
sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=100)
print(datetime.now())
2025-07-23 16:07:42.146067
GENERAL INFO before filtering:
adata = sc.read('../../../../Polioudakis/3_FiltNormAdata.h5ad')
adata
AnnData object with n_obs × n_vars = 27457 × 17263 obs: 'Auth_Cluster', 'Auth_Subcluster', 'Auth_Donor', 'Auth_Layer', 'Auth_Gestation_week', 'Auth_Index', 'Auth_Library', 'Auth_Number_genes_detected', 'Auth_Number_UMI', 'Auth_Percentage_mitochondrial', 'Auth_S_phase_score', 'Auth_G2M_phase_score', 'Auth_Phase', 'dataset_id', 'sample_id', 'cell_label', 'brain_region', 'age', 'stage', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'log1p_gene_UMI_ratio', 'n_genes', 'n_counts', 'Leiden_02', 'Leiden_04', 'Leiden_06', 'Leiden_Sel' var: 'mito', 'ribo', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection' uns: 'Leiden_02', 'Leiden_02_colors', 'Leiden_04', 'Leiden_04_colors', 'Leiden_06', 'Leiden_06_colors', 'Leiden_Sel_colors', 'cell_label_colors', 'diffmap_evals', 'draw_graph', 'harmony', 'hvg', 'log1p', 'pca', 'sample_id_colors', 'umap' obsm: 'X_diffmap_harmony', 'X_fa_harmony', 'X_pca', 'X_pca_harmony', 'X_umap_harmony', 'X_umap_nocorr' varm: 'PCs' layers: 'counts' obsp: 'harmony_connectivities', 'harmony_distances', 'pca_connectivities', 'pca_distances'
isspmatrix(adata.X)
True
print('Loaded Normalizes AnnData object: number of cells', adata.n_obs)
print('Loaded Normalizes AnnData object: number of genes', adata.n_vars)
# To see the columns of the metadata (information available for each cell)
print('Available metadata for each cell: ', adata.obs.columns)
Loaded Normalizes AnnData object: number of cells 27457 Loaded Normalizes AnnData object: number of genes 17263 Available metadata for each cell: Index(['Auth_Cluster', 'Auth_Subcluster', 'Auth_Donor', 'Auth_Layer', 'Auth_Gestation_week', 'Auth_Index', 'Auth_Library', 'Auth_Number_genes_detected', 'Auth_Number_UMI', 'Auth_Percentage_mitochondrial', 'Auth_S_phase_score', 'Auth_G2M_phase_score', 'Auth_Phase', 'dataset_id', 'sample_id', 'cell_label', 'brain_region', 'age', 'stage', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'log1p_gene_UMI_ratio', 'n_genes', 'n_counts', 'Leiden_02', 'Leiden_04', 'Leiden_06', 'Leiden_Sel'], dtype='object')
Loading of hormonal receptor gene signature.
signatures = '../../../../DataDir/ExternalData/Receptors/ReceptorsComplete.txt'
sig = pd.read_csv(signatures, sep="\t", keep_default_na=False) #keep_default_na=False: remove Na values
print(sig.shape)
sig
(39, 2)
GeneName | Signature | |
---|---|---|
0 | THRB | Thyroid |
1 | THRA | Thyroid |
2 | THRAP3 | Thyroid |
3 | DIO1 | Thyroid |
4 | DIO2 | Thyroid |
5 | DIO3 | Thyroid |
6 | SLC16A10 | Thyroid |
7 | SLC16A2 | Thyroid |
8 | SLC7A5 | Thyroid |
9 | KLF9 | Thyroid |
10 | THRSP | Thyroid |
11 | ESRRG | Estrogen |
12 | ESRRA | Estrogen |
13 | GPER1 | Estrogen |
14 | ESR1 | Estrogen |
15 | ESR2 | Estrogen |
16 | ESRRB | Estrogen |
17 | CYP19A1 | Estrogen |
18 | AR | Androgen |
19 | RBP4 | Retinoic Acid |
20 | RARA | Retinoic Acid |
21 | RARB | Retinoic Acid |
22 | RARG | Retinoic Acid |
23 | RXRA | Retinoic Acid |
24 | RXRB | Retinoic Acid |
25 | RXRG | Retinoic Acid |
26 | AHR | AhHyd |
27 | NR3C1 | GC |
28 | NR1H2 | LivX |
29 | NR1H3 | LivX |
30 | PTGER1 | PGE2 |
31 | PTGER2 | PGE2 |
32 | PTGER3 | PGE2 |
33 | PTGER4 | PGE2 |
34 | PPARA | PPAR |
35 | PPARD | PPAR |
36 | PPARG | PPAR |
37 | PGR | Progesterone |
38 | VDR | Vitamine D |
genes = sig["GeneName"].values.tolist()
adata.obsm
AxisArrays with keys: X_diffmap_harmony, X_fa_harmony, X_pca, X_pca_harmony, X_umap_harmony, X_umap_nocorr
sc.pl.embedding(adata, basis="X_umap_harmony", color=['n_genes_by_counts',"total_counts", 'pct_counts_mito', 'pct_counts_ribo'])
sc.pl.embedding(adata, basis="X_umap_harmony", color=['sample_id', 'cell_label'], ncols=1)
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sc.pl.embedding(adata, basis="X_fa_harmony", color=['sample_id', 'cell_label'], ncols=1)
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
col1 = {'Off-Target':'#c5b0d5',
'ExcitatoryNeu':'#aa40fc',
'Progenitors&RG':'#ff7f0e',
'InhibitoryNeu':'#17becf'}
sup_dictG = {'End':'Off-Target',
'ExDp1': 'ExcitatoryNeu',
'ExDp2': 'ExcitatoryNeu',
'ExM': 'ExcitatoryNeu',
'ExM-U': 'ExcitatoryNeu',
'ExN': 'ExcitatoryNeu',
'IP': 'Progenitors&RG',
'InCGE': 'InhibitoryNeu',
'InMGE': 'InhibitoryNeu',
'Mic': 'Off-Target',
'OPC': 'Off-Target',
'Per':'Off-Target',
'PgG2M':'Progenitors&RG',
'PgS':'Progenitors&RG',
'oRG':'Progenitors&RG',
'vRG':'Progenitors&RG'}
#Crate aggregated annotation
adata.obs['super_cell_label'] = adata.obs['cell_label'].replace(sup_dictG)
sc.pl.embedding(adata, basis="X_umap_harmony", color=['super_cell_label'], ncols=1, palette=col1)
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
fn.CustomUmap(adata, genes, embedding="X_umap_harmony")
The following marker genes are missing: {'AHR', 'PGR', 'ESR1', 'RXRG', 'ESRRB', 'PTGER1', 'AR', 'GPER1', 'DIO3', 'VDR', 'PTGER3', 'PTGER4', 'CYP19A1', 'PTGER2', 'PPARG', 'RARB', 'THRSP', 'DIO1'}
fn.CustomUmap(adata, genes, embedding="X_fa_harmony")
The following marker genes are missing: {'AHR', 'PGR', 'ESR1', 'RXRG', 'ESRRB', 'PTGER1', 'AR', 'GPER1', 'DIO3', 'VDR', 'PTGER3', 'PTGER4', 'CYP19A1', 'PTGER2', 'PPARG', 'RARB', 'THRSP', 'DIO1'}
available_genes = [gene for gene in genes if gene in adata.var_names]
if available_genes:
sc.pl.dotplot(adata, available_genes, groupby='cell_label')
else:
print("None of the specified genes are found in adata.var_names.")
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored dot_ax.scatter(x, y, **kwds)
Adata is not saved, since no new computations have been performed. I just save the notebooks.
print(datetime.now())
2025-07-23 16:22:14.400917
%%bash
# save also html and python versions for git
jupyter nbconvert ExplorationPolioudakis.ipynb --to="python" --output="ExplorationPolioudakis"
jupyter nbconvert ExplorationPolioudakis.ipynb --to="html" --output="ExplorationPolioudakis"
[NbConvertApp] Converting notebook ExplorationPolioudakis.ipynb to python [NbConvertApp] Writing 4912 bytes to ExplorationPolioudakis.py [NbConvertApp] Converting notebook ExplorationPolioudakis.ipynb to html [NbConvertApp] Writing 12888297 bytes to ExplorationPolioudakis.html