import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import igraph as ig
import matplotlib.pyplot as plt 
from scipy.sparse import csr_matrix, isspmatrix
from datetime import datetime
import sys
sys.path.append('../')
import functions as fn

print(np.__version__)
print(pd.__version__)
print(sc.__version__)

1.23.5
2.0.0
1.9.3
Running Scanpy 1.9.3, on 2025-07-23 11:31.


sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=100)


print(datetime.now())

2025-07-23 11:31:49.003619


adata = sc.read('../../../../Castaldi_multiplexingCBO/adataPaga.h5ad')


adata

AnnData object with n_obs × n_vars = 14913 × 3499
    obs: 'dataset', 'cellID', 'cellID_newName', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'stage', 'type', 'id_stage', 'cellID_newName_type', 'S_score', 'G2M_score', 'phase', 'leidenAnnotated', 'leiden_1.2', 'endpoint_GlutamatergicNeurons_late', 'endpoint_GlutamatergicNeurons_early', 'endpoint_MigratingNeurons', 'endpoint_OuterRadialGliaAstrocytes', 'endpoint_Interneurons', 'endpoint_Interneurons_GAD2', 'endpoint_CajalR_like', 'Exc_Lineage', 'endpoint_GlutamatergicNeurons_both'
    var: 'highly_variable', 'mean', 'std'
    uns: 'Exc_Lineage_colors', 'cellID_colors', 'cellID_newName_colors', 'cellID_newName_type_colors', 'cluster_colors', 'dataset_colors', 'diffmap_evals', 'draw_graph', 'leiden', 'leidenAnnotated_colors', 'leiden_1.2_colors', 'leiden_1.2_sizes', 'leiden_Filt_colors', 'leiden_colors', 'neighbors', 'paga', 'pca', 'phase_colors', 'score_filt', 'stage_colors', 'type_colors', 'umap'
    obsm: 'X_diffmap', 'X_draw_graph_fa', 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'


print('Loaded Normalizes AnnData object: number of cells', adata.n_obs)
print('Loaded Normalizes AnnData object: number of genes', adata.n_vars)

# To see the columns of the metadata (information available for each cell)  
print('Available metadata for each cell: ', adata.obs.columns)

Loaded Normalizes AnnData object: number of cells 14913
Loaded Normalizes AnnData object: number of genes 3499
Available metadata for each cell:  Index(['dataset', 'cellID', 'cellID_newName', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt',
       'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo',
       'stage', 'type', 'id_stage', 'cellID_newName_type', 'S_score',
       'G2M_score', 'phase', 'leidenAnnotated', 'leiden_1.2',
       'endpoint_GlutamatergicNeurons_late',
       'endpoint_GlutamatergicNeurons_early', 'endpoint_MigratingNeurons',
       'endpoint_OuterRadialGliaAstrocytes', 'endpoint_Interneurons',
       'endpoint_Interneurons_GAD2', 'endpoint_CajalR_like', 'Exc_Lineage',
       'endpoint_GlutamatergicNeurons_both'],
      dtype='object')


np.unique(adata.obs.values[:,14])

array(['downstream', 'upstream'], dtype=object)


signatures = '../../../../DataDir/ExternalData/Receptors/ReceptorsComplete.txt'


sig = pd.read_csv(signatures, sep="\t", keep_default_na=False)  
print(sig.shape)
sig

(39, 2)


genes = sig["GeneName"].values.tolist()


adata.obsm

AxisArrays with keys: X_diffmap, X_draw_graph_fa, X_pca, X_umap


sc.pl.embedding(adata, basis="X_umap", color=['n_genes_by_counts',"total_counts", 'pct_counts_mt', 'pct_counts_ribo'])


sc.pl.embedding(adata,  basis="X_umap", color=['leidenAnnotated'], ncols=1)

/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(


sc.pl.embedding(adata,  basis="X_draw_graph_fa", color=['leidenAnnotated'], ncols=1)

/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(


fn.CustomUmap(adata, genes, embedding="X_umap")

The following marker genes are missing:  {'NR1H2', 'PTGER2', 'RBP4', 'RXRA', 'SLC16A10', 'VDR', 'PPARA', 'RARB', 'KLF9', 'PTGER1', 'PPARD', 'NR1H3', 'GPER1', 'THRAP3', 'PGR', 'DIO1', 'ESR1', 'PPARG', 'CYP19A1', 'SLC16A2', 'AHR', 'PTGER4', 'ESRRA', 'ESR2', 'RXRB', 'RARA', 'THRSP'}


fn.CustomUmap(adata, genes, embedding="X_draw_graph_fa")

The following marker genes are missing:  {'NR1H2', 'PTGER2', 'RBP4', 'RXRA', 'SLC16A10', 'VDR', 'PPARA', 'RARB', 'KLF9', 'PTGER1', 'PPARD', 'NR1H3', 'GPER1', 'THRAP3', 'PGR', 'DIO1', 'ESR1', 'PPARG', 'CYP19A1', 'SLC16A2', 'AHR', 'PTGER4', 'ESRRA', 'ESR2', 'RXRB', 'RARA', 'THRSP'}


available_genes = [gene for gene in genes if gene in adata.var_names]

if available_genes:
    sc.pl.dotplot(adata, available_genes, groupby='leidenAnnotated')
else:
    print("None of the specified genes are found in adata.var_names.")

/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)


print(datetime.now())

2025-07-23 15:49:56.840740


%%bash

# save also html and python versions for git
jupyter nbconvert ExplorationCastaldi.ipynb --to="python" --output="ExplorationCastaldi"
jupyter nbconvert ExplorationCastaldi.ipynb --to="html" --output="ExplorationCastaldi"

[NbConvertApp] Converting notebook ExplorationCastaldi.ipynb to python
[NbConvertApp] Writing 3043 bytes to ExplorationCastaldi.py
[NbConvertApp] Converting notebook ExplorationCastaldi.ipynb to html
[NbConvertApp] Writing 11661515 bytes to ExplorationCastaldi.html

	GeneName	Signature
0	THRB	Thyroid
1	THRA	Thyroid
2	THRAP3	Thyroid
3	DIO1	Thyroid
4	DIO2	Thyroid
5	DIO3	Thyroid
6	SLC16A10	Thyroid
7	SLC16A2	Thyroid
8	SLC7A5	Thyroid
9	KLF9	Thyroid
10	THRSP	Thyroid
11	ESRRG	Estrogen
12	ESRRA	Estrogen
13	GPER1	Estrogen
14	ESR1	Estrogen
15	ESR2	Estrogen
16	ESRRB	Estrogen
17	CYP19A1	Estrogen
18	AR	Androgen
19	RBP4	Retinoic Acid
20	RARA	Retinoic Acid
21	RARB	Retinoic Acid
22	RARG	Retinoic Acid
23	RXRA	Retinoic Acid
24	RXRB	Retinoic Acid
25	RXRG	Retinoic Acid
26	AHR	AhHyd
27	NR3C1	GC
28	NR1H2	LivX
29	NR1H3	LivX
30	PTGER1	PGE2
31	PTGER2	PGE2
32	PTGER3	PGE2
33	PTGER4	PGE2
34	PPARA	PPAR
35	PPARD	PPAR
36	PPARG	PPAR
37	PGR	Progesterone
38	VDR	Vitamine D

Exploration of hormonal receptor genes in Castaldi et al organoid dataset¶

1. Environment Set Up¶

1.1 Library upload¶

1.2 Starting computations: timestamp¶

2. Read input files¶

2.1 adata loading¶

2.2 Receptors signature loading¶

3. Visualizations¶

3.1 Counts from adata¶

3.2 Clusters annotation¶

3.3 Visualization of receptors on UMAP¶

4. Save Notebooks¶

4.1 Timestamp finished computations¶

4.2 Save python and html version of notebook¶