Exploration of hormonal receptor genes in Castaldi et al organoid dataset¶

Reference paper

1. Environment Set Up¶

1.1 Library upload¶

In [3]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import igraph as ig
import matplotlib.pyplot as plt 
from scipy.sparse import csr_matrix, isspmatrix
from datetime import datetime
import sys
sys.path.append('../')
import functions as fn

print(np.__version__)
print(pd.__version__)
print(sc.__version__)
1.23.5
2.0.0
1.9.3
Running Scanpy 1.9.3, on 2025-07-23 11:31.
In [4]:
sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=100)

1.2 Starting computations: timestamp¶

In [5]:
print(datetime.now())
2025-07-23 11:31:49.003619

2. Read input files¶

2.1 adata loading¶

In [8]:
adata = sc.read('../../../../Castaldi_multiplexingCBO/adataPaga.h5ad')
In [9]:
adata
Out[9]:
AnnData object with n_obs × n_vars = 14913 × 3499
    obs: 'dataset', 'cellID', 'cellID_newName', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'stage', 'type', 'id_stage', 'cellID_newName_type', 'S_score', 'G2M_score', 'phase', 'leidenAnnotated', 'leiden_1.2', 'endpoint_GlutamatergicNeurons_late', 'endpoint_GlutamatergicNeurons_early', 'endpoint_MigratingNeurons', 'endpoint_OuterRadialGliaAstrocytes', 'endpoint_Interneurons', 'endpoint_Interneurons_GAD2', 'endpoint_CajalR_like', 'Exc_Lineage', 'endpoint_GlutamatergicNeurons_both'
    var: 'highly_variable', 'mean', 'std'
    uns: 'Exc_Lineage_colors', 'cellID_colors', 'cellID_newName_colors', 'cellID_newName_type_colors', 'cluster_colors', 'dataset_colors', 'diffmap_evals', 'draw_graph', 'leiden', 'leidenAnnotated_colors', 'leiden_1.2_colors', 'leiden_1.2_sizes', 'leiden_Filt_colors', 'leiden_colors', 'neighbors', 'paga', 'pca', 'phase_colors', 'score_filt', 'stage_colors', 'type_colors', 'umap'
    obsm: 'X_diffmap', 'X_draw_graph_fa', 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'
In [11]:
print('Loaded Normalizes AnnData object: number of cells', adata.n_obs)
print('Loaded Normalizes AnnData object: number of genes', adata.n_vars)

# To see the columns of the metadata (information available for each cell)  
print('Available metadata for each cell: ', adata.obs.columns)
Loaded Normalizes AnnData object: number of cells 14913
Loaded Normalizes AnnData object: number of genes 3499
Available metadata for each cell:  Index(['dataset', 'cellID', 'cellID_newName', 'n_genes_by_counts',
       'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts',
       'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt',
       'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo',
       'stage', 'type', 'id_stage', 'cellID_newName_type', 'S_score',
       'G2M_score', 'phase', 'leidenAnnotated', 'leiden_1.2',
       'endpoint_GlutamatergicNeurons_late',
       'endpoint_GlutamatergicNeurons_early', 'endpoint_MigratingNeurons',
       'endpoint_OuterRadialGliaAstrocytes', 'endpoint_Interneurons',
       'endpoint_Interneurons_GAD2', 'endpoint_CajalR_like', 'Exc_Lineage',
       'endpoint_GlutamatergicNeurons_both'],
      dtype='object')
In [12]:
np.unique(adata.obs.values[:,14])
Out[12]:
array(['downstream', 'upstream'], dtype=object)

2.2 Receptors signature loading¶

Loading of hormonal receptor gene signature.

In [13]:
signatures = '../../../../DataDir/ExternalData/Receptors/ReceptorsComplete.txt'
In [14]:
sig = pd.read_csv(signatures, sep="\t", keep_default_na=False)  
print(sig.shape)
sig
(39, 2)
Out[14]:
GeneName Signature
0 THRB Thyroid
1 THRA Thyroid
2 THRAP3 Thyroid
3 DIO1 Thyroid
4 DIO2 Thyroid
5 DIO3 Thyroid
6 SLC16A10 Thyroid
7 SLC16A2 Thyroid
8 SLC7A5 Thyroid
9 KLF9 Thyroid
10 THRSP Thyroid
11 ESRRG Estrogen
12 ESRRA Estrogen
13 GPER1 Estrogen
14 ESR1 Estrogen
15 ESR2 Estrogen
16 ESRRB Estrogen
17 CYP19A1 Estrogen
18 AR Androgen
19 RBP4 Retinoic Acid
20 RARA Retinoic Acid
21 RARB Retinoic Acid
22 RARG Retinoic Acid
23 RXRA Retinoic Acid
24 RXRB Retinoic Acid
25 RXRG Retinoic Acid
26 AHR AhHyd
27 NR3C1 GC
28 NR1H2 LivX
29 NR1H3 LivX
30 PTGER1 PGE2
31 PTGER2 PGE2
32 PTGER3 PGE2
33 PTGER4 PGE2
34 PPARA PPAR
35 PPARD PPAR
36 PPARG PPAR
37 PGR Progesterone
38 VDR Vitamine D
In [15]:
genes = sig["GeneName"].values.tolist()

3. Visualizations¶

3.1 Counts from adata¶

In [16]:
adata.obsm
Out[16]:
AxisArrays with keys: X_diffmap, X_draw_graph_fa, X_pca, X_umap
In [17]:
sc.pl.embedding(adata, basis="X_umap", color=['n_genes_by_counts',"total_counts", 'pct_counts_mt', 'pct_counts_ribo'])

3.2 Clusters annotation¶

In [18]:
sc.pl.embedding(adata,  basis="X_umap", color=['leidenAnnotated'], ncols=1)
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
In [19]:
sc.pl.embedding(adata,  basis="X_draw_graph_fa", color=['leidenAnnotated'], ncols=1)
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

3.3 Visualization of receptors on UMAP¶

In [20]:
fn.CustomUmap(adata, genes, embedding="X_umap")
The following marker genes are missing:  {'NR1H2', 'PTGER2', 'RBP4', 'RXRA', 'SLC16A10', 'VDR', 'PPARA', 'RARB', 'KLF9', 'PTGER1', 'PPARD', 'NR1H3', 'GPER1', 'THRAP3', 'PGR', 'DIO1', 'ESR1', 'PPARG', 'CYP19A1', 'SLC16A2', 'AHR', 'PTGER4', 'ESRRA', 'ESR2', 'RXRB', 'RARA', 'THRSP'}
In [21]:
fn.CustomUmap(adata, genes, embedding="X_draw_graph_fa")
The following marker genes are missing:  {'NR1H2', 'PTGER2', 'RBP4', 'RXRA', 'SLC16A10', 'VDR', 'PPARA', 'RARB', 'KLF9', 'PTGER1', 'PPARD', 'NR1H3', 'GPER1', 'THRAP3', 'PGR', 'DIO1', 'ESR1', 'PPARG', 'CYP19A1', 'SLC16A2', 'AHR', 'PTGER4', 'ESRRA', 'ESR2', 'RXRB', 'RARA', 'THRSP'}
In [27]:
available_genes = [gene for gene in genes if gene in adata.var_names]

if available_genes:
    sc.pl.dotplot(adata, available_genes, groupby='leidenAnnotated')
else:
    print("None of the specified genes are found in adata.var_names.")
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

4. Save Notebooks¶

Adata is not saved, since no new computations have been performed. I just save the notebooks.

4.1 Timestamp finished computations¶

In [24]:
print(datetime.now())
2025-07-23 15:49:56.840740

4.2 Save python and html version of notebook¶

In [28]:
%%bash

# save also html and python versions for git
jupyter nbconvert ExplorationCastaldi.ipynb --to="python" --output="ExplorationCastaldi"
jupyter nbconvert ExplorationCastaldi.ipynb --to="html" --output="ExplorationCastaldi"
[NbConvertApp] Converting notebook ExplorationCastaldi.ipynb to python
[NbConvertApp] Writing 3043 bytes to ExplorationCastaldi.py
[NbConvertApp] Converting notebook ExplorationCastaldi.ipynb to html
[NbConvertApp] Writing 11661515 bytes to ExplorationCastaldi.html
In [ ]: