import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import igraph as ig
import matplotlib.pyplot as plt 
from scipy.sparse import csr_matrix, isspmatrix
from datetime import datetime
sys.path.append('../')
import functions as fn

print(np.__version__)
print(pd.__version__)
print(sc.__version__)

1.23.5
2.0.0
1.9.3


sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=100)


print(datetime.now())

2025-07-23 16:07:42.146067


adata = sc.read('../../../../Polioudakis/3_FiltNormAdata.h5ad')


adata

AnnData object with n_obs × n_vars = 27457 × 17263
    obs: 'Auth_Cluster', 'Auth_Subcluster', 'Auth_Donor', 'Auth_Layer', 'Auth_Gestation_week', 'Auth_Index', 'Auth_Library', 'Auth_Number_genes_detected', 'Auth_Number_UMI', 'Auth_Percentage_mitochondrial', 'Auth_S_phase_score', 'Auth_G2M_phase_score', 'Auth_Phase', 'dataset_id', 'sample_id', 'cell_label', 'brain_region', 'age', 'stage', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'log1p_gene_UMI_ratio', 'n_genes', 'n_counts', 'Leiden_02', 'Leiden_04', 'Leiden_06', 'Leiden_Sel'
    var: 'mito', 'ribo', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection'
    uns: 'Leiden_02', 'Leiden_02_colors', 'Leiden_04', 'Leiden_04_colors', 'Leiden_06', 'Leiden_06_colors', 'Leiden_Sel_colors', 'cell_label_colors', 'diffmap_evals', 'draw_graph', 'harmony', 'hvg', 'log1p', 'pca', 'sample_id_colors', 'umap'
    obsm: 'X_diffmap_harmony', 'X_fa_harmony', 'X_pca', 'X_pca_harmony', 'X_umap_harmony', 'X_umap_nocorr'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'harmony_connectivities', 'harmony_distances', 'pca_connectivities', 'pca_distances'


isspmatrix(adata.X)

True


print('Loaded Normalizes AnnData object: number of cells', adata.n_obs)
print('Loaded Normalizes AnnData object: number of genes', adata.n_vars)

# To see the columns of the metadata (information available for each cell)  
print('Available metadata for each cell: ', adata.obs.columns)

Loaded Normalizes AnnData object: number of cells 27457
Loaded Normalizes AnnData object: number of genes 17263
Available metadata for each cell:  Index(['Auth_Cluster', 'Auth_Subcluster', 'Auth_Donor', 'Auth_Layer',
       'Auth_Gestation_week', 'Auth_Index', 'Auth_Library',
       'Auth_Number_genes_detected', 'Auth_Number_UMI',
       'Auth_Percentage_mitochondrial', 'Auth_S_phase_score',
       'Auth_G2M_phase_score', 'Auth_Phase', 'dataset_id', 'sample_id',
       'cell_label', 'brain_region', 'age', 'stage', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito',
       'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo',
       'log1p_gene_UMI_ratio', 'n_genes', 'n_counts', 'Leiden_02', 'Leiden_04',
       'Leiden_06', 'Leiden_Sel'],
      dtype='object')


signatures = '../../../../DataDir/ExternalData/Receptors/ReceptorsComplete.txt'


sig = pd.read_csv(signatures, sep="\t", keep_default_na=False)  #keep_default_na=False: remove Na values
print(sig.shape)
sig

(39, 2)


genes = sig["GeneName"].values.tolist()


adata.obsm

AxisArrays with keys: X_diffmap_harmony, X_fa_harmony, X_pca, X_pca_harmony, X_umap_harmony, X_umap_nocorr


sc.pl.embedding(adata, basis="X_umap_harmony", color=['n_genes_by_counts',"total_counts", 'pct_counts_mito', 'pct_counts_ribo'])


sc.pl.embedding(adata,  basis="X_umap_harmony", color=['sample_id', 'cell_label'], ncols=1)

/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(


sc.pl.embedding(adata,  basis="X_fa_harmony", color=['sample_id', 'cell_label'], ncols=1)

/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(


col1 = {'Off-Target':'#c5b0d5',
        'ExcitatoryNeu':'#aa40fc',
        'Progenitors&RG':'#ff7f0e',
        'InhibitoryNeu':'#17becf'}


sup_dictG  = {'End':'Off-Target',
               'ExDp1': 'ExcitatoryNeu',
               'ExDp2': 'ExcitatoryNeu', 
               'ExM': 'ExcitatoryNeu', 
               'ExM-U': 'ExcitatoryNeu', 
               'ExN': 'ExcitatoryNeu', 
               'IP': 'Progenitors&RG', 
               'InCGE': 'InhibitoryNeu', 
               'InMGE': 'InhibitoryNeu', 
               'Mic': 'Off-Target', 
               'OPC': 'Off-Target', 
               'Per':'Off-Target', 
               'PgG2M':'Progenitors&RG', 
               'PgS':'Progenitors&RG', 
               'oRG':'Progenitors&RG', 
               'vRG':'Progenitors&RG'}

#Crate aggregated annotation
adata.obs['super_cell_label'] = adata.obs['cell_label'].replace(sup_dictG)


sc.pl.embedding(adata,  basis="X_umap_harmony", color=['super_cell_label'], ncols=1, palette=col1)

/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(


fn.CustomUmap(adata, genes, embedding="X_umap_harmony")

The following marker genes are missing:  {'AHR', 'PGR', 'ESR1', 'RXRG', 'ESRRB', 'PTGER1', 'AR', 'GPER1', 'DIO3', 'VDR', 'PTGER3', 'PTGER4', 'CYP19A1', 'PTGER2', 'PPARG', 'RARB', 'THRSP', 'DIO1'}


fn.CustomUmap(adata, genes, embedding="X_fa_harmony")

The following marker genes are missing:  {'AHR', 'PGR', 'ESR1', 'RXRG', 'ESRRB', 'PTGER1', 'AR', 'GPER1', 'DIO3', 'VDR', 'PTGER3', 'PTGER4', 'CYP19A1', 'PTGER2', 'PPARG', 'RARB', 'THRSP', 'DIO1'}


available_genes = [gene for gene in genes if gene in adata.var_names]

if available_genes:
    sc.pl.dotplot(adata, available_genes, groupby='cell_label')
else:
    print("None of the specified genes are found in adata.var_names.")

/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)


print(datetime.now())

2025-07-23 16:22:14.400917


%%bash

# save also html and python versions for git
jupyter nbconvert ExplorationPolioudakis.ipynb --to="python" --output="ExplorationPolioudakis"
jupyter nbconvert ExplorationPolioudakis.ipynb --to="html" --output="ExplorationPolioudakis"

[NbConvertApp] Converting notebook ExplorationPolioudakis.ipynb to python
[NbConvertApp] Writing 4912 bytes to ExplorationPolioudakis.py
[NbConvertApp] Converting notebook ExplorationPolioudakis.ipynb to html
[NbConvertApp] Writing 12888297 bytes to ExplorationPolioudakis.html

	GeneName	Signature
0	THRB	Thyroid
1	THRA	Thyroid
2	THRAP3	Thyroid
3	DIO1	Thyroid
4	DIO2	Thyroid
5	DIO3	Thyroid
6	SLC16A10	Thyroid
7	SLC16A2	Thyroid
8	SLC7A5	Thyroid
9	KLF9	Thyroid
10	THRSP	Thyroid
11	ESRRG	Estrogen
12	ESRRA	Estrogen
13	GPER1	Estrogen
14	ESR1	Estrogen
15	ESR2	Estrogen
16	ESRRB	Estrogen
17	CYP19A1	Estrogen
18	AR	Androgen
19	RBP4	Retinoic Acid
20	RARA	Retinoic Acid
21	RARB	Retinoic Acid
22	RARG	Retinoic Acid
23	RXRA	Retinoic Acid
24	RXRB	Retinoic Acid
25	RXRG	Retinoic Acid
26	AHR	AhHyd
27	NR3C1	GC
28	NR1H2	LivX
29	NR1H3	LivX
30	PTGER1	PGE2
31	PTGER2	PGE2
32	PTGER3	PGE2
33	PTGER4	PGE2
34	PPARA	PPAR
35	PPARD	PPAR
36	PPARG	PPAR
37	PGR	Progesterone
38	VDR	Vitamine D

Exploration of hormonal receptor genes in Polioudakis et al human fetal brain dataset¶

1. Environment Set Up¶

1.1 Library upload¶

1.2 Starting computations: timestamp¶

2. Read input files¶

2.1 adata loading¶

2.2 Receptors signature loading¶

3. Visualizations¶

3.1 Counts from adata¶

3.2 Clusters annotation¶

3.3 Visualization of receptors on UMAP¶

4. Save Notebooks¶

4.1 Timestamp finished computations¶

4.2 Save python and html version of notebook¶