import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import igraph as ig
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix, isspmatrix
from datetime import datetime
import sys
sys.path.append('../')
import functions as fn
print(np.__version__)
print(pd.__version__)
print(sc.__version__)
1.23.5 2.0.0 1.9.3 Running Scanpy 1.9.3, on 2025-07-23 11:31.
sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=100)
print(datetime.now())
2025-07-23 11:31:49.003619
adata = sc.read('../../../../Castaldi_multiplexingCBO/adataPaga.h5ad')
adata
AnnData object with n_obs × n_vars = 14913 × 3499 obs: 'dataset', 'cellID', 'cellID_newName', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'stage', 'type', 'id_stage', 'cellID_newName_type', 'S_score', 'G2M_score', 'phase', 'leidenAnnotated', 'leiden_1.2', 'endpoint_GlutamatergicNeurons_late', 'endpoint_GlutamatergicNeurons_early', 'endpoint_MigratingNeurons', 'endpoint_OuterRadialGliaAstrocytes', 'endpoint_Interneurons', 'endpoint_Interneurons_GAD2', 'endpoint_CajalR_like', 'Exc_Lineage', 'endpoint_GlutamatergicNeurons_both' var: 'highly_variable', 'mean', 'std' uns: 'Exc_Lineage_colors', 'cellID_colors', 'cellID_newName_colors', 'cellID_newName_type_colors', 'cluster_colors', 'dataset_colors', 'diffmap_evals', 'draw_graph', 'leiden', 'leidenAnnotated_colors', 'leiden_1.2_colors', 'leiden_1.2_sizes', 'leiden_Filt_colors', 'leiden_colors', 'neighbors', 'paga', 'pca', 'phase_colors', 'score_filt', 'stage_colors', 'type_colors', 'umap' obsm: 'X_diffmap', 'X_draw_graph_fa', 'X_pca', 'X_umap' varm: 'PCs' obsp: 'connectivities', 'distances'
print('Loaded Normalizes AnnData object: number of cells', adata.n_obs)
print('Loaded Normalizes AnnData object: number of genes', adata.n_vars)
# To see the columns of the metadata (information available for each cell)
print('Available metadata for each cell: ', adata.obs.columns)
Loaded Normalizes AnnData object: number of cells 14913 Loaded Normalizes AnnData object: number of genes 3499 Available metadata for each cell: Index(['dataset', 'cellID', 'cellID_newName', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'stage', 'type', 'id_stage', 'cellID_newName_type', 'S_score', 'G2M_score', 'phase', 'leidenAnnotated', 'leiden_1.2', 'endpoint_GlutamatergicNeurons_late', 'endpoint_GlutamatergicNeurons_early', 'endpoint_MigratingNeurons', 'endpoint_OuterRadialGliaAstrocytes', 'endpoint_Interneurons', 'endpoint_Interneurons_GAD2', 'endpoint_CajalR_like', 'Exc_Lineage', 'endpoint_GlutamatergicNeurons_both'], dtype='object')
np.unique(adata.obs.values[:,14])
array(['downstream', 'upstream'], dtype=object)
Loading of hormonal receptor gene signature.
signatures = '../../../../DataDir/ExternalData/Receptors/ReceptorsComplete.txt'
sig = pd.read_csv(signatures, sep="\t", keep_default_na=False)
print(sig.shape)
sig
(39, 2)
GeneName | Signature | |
---|---|---|
0 | THRB | Thyroid |
1 | THRA | Thyroid |
2 | THRAP3 | Thyroid |
3 | DIO1 | Thyroid |
4 | DIO2 | Thyroid |
5 | DIO3 | Thyroid |
6 | SLC16A10 | Thyroid |
7 | SLC16A2 | Thyroid |
8 | SLC7A5 | Thyroid |
9 | KLF9 | Thyroid |
10 | THRSP | Thyroid |
11 | ESRRG | Estrogen |
12 | ESRRA | Estrogen |
13 | GPER1 | Estrogen |
14 | ESR1 | Estrogen |
15 | ESR2 | Estrogen |
16 | ESRRB | Estrogen |
17 | CYP19A1 | Estrogen |
18 | AR | Androgen |
19 | RBP4 | Retinoic Acid |
20 | RARA | Retinoic Acid |
21 | RARB | Retinoic Acid |
22 | RARG | Retinoic Acid |
23 | RXRA | Retinoic Acid |
24 | RXRB | Retinoic Acid |
25 | RXRG | Retinoic Acid |
26 | AHR | AhHyd |
27 | NR3C1 | GC |
28 | NR1H2 | LivX |
29 | NR1H3 | LivX |
30 | PTGER1 | PGE2 |
31 | PTGER2 | PGE2 |
32 | PTGER3 | PGE2 |
33 | PTGER4 | PGE2 |
34 | PPARA | PPAR |
35 | PPARD | PPAR |
36 | PPARG | PPAR |
37 | PGR | Progesterone |
38 | VDR | Vitamine D |
genes = sig["GeneName"].values.tolist()
adata.obsm
AxisArrays with keys: X_diffmap, X_draw_graph_fa, X_pca, X_umap
sc.pl.embedding(adata, basis="X_umap", color=['n_genes_by_counts',"total_counts", 'pct_counts_mt', 'pct_counts_ribo'])
sc.pl.embedding(adata, basis="X_umap", color=['leidenAnnotated'], ncols=1)
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sc.pl.embedding(adata, basis="X_draw_graph_fa", color=['leidenAnnotated'], ncols=1)
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
fn.CustomUmap(adata, genes, embedding="X_umap")
The following marker genes are missing: {'NR1H2', 'PTGER2', 'RBP4', 'RXRA', 'SLC16A10', 'VDR', 'PPARA', 'RARB', 'KLF9', 'PTGER1', 'PPARD', 'NR1H3', 'GPER1', 'THRAP3', 'PGR', 'DIO1', 'ESR1', 'PPARG', 'CYP19A1', 'SLC16A2', 'AHR', 'PTGER4', 'ESRRA', 'ESR2', 'RXRB', 'RARA', 'THRSP'}
fn.CustomUmap(adata, genes, embedding="X_draw_graph_fa")
The following marker genes are missing: {'NR1H2', 'PTGER2', 'RBP4', 'RXRA', 'SLC16A10', 'VDR', 'PPARA', 'RARB', 'KLF9', 'PTGER1', 'PPARD', 'NR1H3', 'GPER1', 'THRAP3', 'PGR', 'DIO1', 'ESR1', 'PPARG', 'CYP19A1', 'SLC16A2', 'AHR', 'PTGER4', 'ESRRA', 'ESR2', 'RXRB', 'RARA', 'THRSP'}
available_genes = [gene for gene in genes if gene in adata.var_names]
if available_genes:
sc.pl.dotplot(adata, available_genes, groupby='leidenAnnotated')
else:
print("None of the specified genes are found in adata.var_names.")
/usr/local/lib/python3.8/dist-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored dot_ax.scatter(x, y, **kwds)
Adata is not saved, since no new computations have been performed. I just save the notebooks.
print(datetime.now())
2025-07-23 15:49:56.840740
%%bash
# save also html and python versions for git
jupyter nbconvert ExplorationCastaldi.ipynb --to="python" --output="ExplorationCastaldi"
jupyter nbconvert ExplorationCastaldi.ipynb --to="html" --output="ExplorationCastaldi"
[NbConvertApp] Converting notebook ExplorationCastaldi.ipynb to python [NbConvertApp] Writing 3043 bytes to ExplorationCastaldi.py [NbConvertApp] Converting notebook ExplorationCastaldi.ipynb to html [NbConvertApp] Writing 11661515 bytes to ExplorationCastaldi.html