import scanpy as sc
import pandas as pd
from matplotlib import pylab
import random
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import itertools
import plotly.express as px
import yaml
import numpy as np
import random
import anndata as ad
from scipy.sparse import csr_matrix, issparse
from scipy import sparse
from matplotlib.colors import TwoSlopeNorm
import scanpy.external as sce
import sys
import scvelo as scv
import anndata
sc.settings.set_figure_params(dpi=80, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (6, 6)
homeDir = os.getenv("HOME")
sys.path.insert(1, homeDir+"/utils/")
from PlotPCA_components import *
from AtlasClasses import *
DS="hormones_substudy"
ReferenceTissue="cortex"
with open(homeDir+"/utils/ReferenceDict.yaml", 'r') as file:
ReferencePaths = yaml.safe_load(file)
for k in list(ReferencePaths.keys()):
ReferencePaths[k]["adataPath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["adataPath"]
#ReferencePaths[k]["signaturePath"] = homeDir+ReferencePaths[k]["signaturePath"]
ReferencePaths[k]["signaturePath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["signaturePath"]
ReferencePaths[k]["signaturePurityPath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["signaturePurityPath"]
ReferencePaths
{'cortex': {'dsname': 'Poliudakis2019_cortex', 'adataPath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/cortex.Reference.h5ad', 'signaturePath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/cortex.SignatureGenes.tsv', 'signaturePurityPath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/cortex.SignaturesPurity.tsv', 'LabelTtransferAggregation': {'oRG': 'Glia', 'IP': 'IP', 'ExcitatoryDeepLayer': 'ExcitatoryNeuron', 'Cycling': 'Cycling', 'ExcitatoryMigrating': 'ExcitatoryNeuron', 'Excitatory': 'ExcitatoryNeuron', 'Endothelium': 'Vascular', 'vRG': 'Glia', 'Inhibitory': 'ForebrainInhibitory', 'OPC': 'OPC', 'Mic': 'Mic'}, 'RelevantContrasts': {'Inh_vs_Exc': ['cortex_Inhibitory', 'cortex_Excitatory'], 'Exc_vs_cycling': ['cortex_Excitatory', 'cortex_CyclingProgenitors']}}, 'cerebellum': {'dsname': 'Aldinger2021_cerebellum', 'adataPath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/cerebellum.Reference.h5ad', 'signaturePath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/cerebellum.SignatureGenes.tsv', 'signaturePurityPath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/cerebellum.SignaturesPurity.tsv', 'LabelTtransferAggregation': {'Astrocytes': 'Glia', 'Purkinje': 'CerebellarInhibitory', 'InhibitoryProgenitors': 'CerebellarInhibitory', 'Inhibitory': 'CerebellarInhibitory', 'Glia': 'Glia', 'GranuleNeurons': 'ExcitatoryNeuron', 'Endothelium': 'Vascular', 'ExcitatoryInterneuron': 'ExcitatoryNeuron', 'OPC': 'OPC', 'Mic': 'Mic', 'CyclingGranulePrecursos': 'RLCycling', 'ML_gabaergic': 'CerebellarInhibitory', 'MLgabaergic': 'CerebellarInhibitory', 'ML_gabaergic ': 'CerebellarInhibitory'}, 'RelevantContrasts': {'Inh_vs_Exc': ['cerebellum_Inhibitory', 'cerebellum_Excitatory'], 'ExcInt_vs_Glia': ['cerebellum_Inhibitory', 'cerebellum_GliaProgenitors']}}, 'subpallium': {'dsname': 'Yu2021_subpallium', 'adataPath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/subpallium.Reference.h5ad', 'signaturePath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/subpallium.SignatureGenes.tsv', 'signaturePurityPath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/subpallium.SignaturesPurity.tsv', 'LabelTtransferAggregation': {'Cycling': 'Cycling', 'Inhibitory': 'Inhibitory', 'IP': 'IP', 'Excitatory': 'ExcitatoryNeuron', 'Endothelium': 'Vascular', 'Mic': 'Mic', 'OPC': 'OPC'}, 'RelevantContrasts': {'Cycling_vs_Inh': ['subpallium_CyclingProgenitors', 'subpallium_Inhibitory']}}, 'thalamus': {'dsname': 'KimSecondTri2023_Thalamus', 'adataPath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/thalamus.Reference.h5ad', 'signaturePath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/thalamus.SignatureGenes.tsv', 'signaturePurityPath': '/group/testa/Users/davide.castaldi/Polaroids_spinoff/2_GenerateReferences/thalamus.SignaturesPurity.tsv', 'LabelTtransferAggregation': {'Cycling': 'Cycling', 'Inhibitory': 'Inhibitory', 'IP': 'IP', 'Excitatory': 'ExcitatoryNeuron', 'Endothelium': 'Vascular', 'Mic': 'Mic', 'OPC': 'OPC'}, 'RelevantContrasts': {'Exc_vs_Astro': ['thalamus_Excitatory', 'thalamus_GliaProgenitors']}}}
adata = sc.read_h5ad("./adatas/4.1.{}.labeltransfer.h5ad".format(DS))
adata.layers["logNorm"] = adata.X.copy()
sc.pp.scale(adata)
scv.tl.score_genes_cell_cycle(adata)
calculating cell cycle phase --> 'S_score' and 'G2M_score', scores of cell cycle phases (adata.obs)
Compositions = adata.obs.groupby(["sample_id","phase"], as_index=False).size()
Compositions["SampleNormSize"] = Compositions["size"].divide(Compositions.groupby("sample_id")["size"].sum().loc[Compositions.sample_id].to_numpy())
Compositions["Condition"] = pd.crosstab(adata.obs["sample_id"], adata.obs["condition"]).idxmax(axis=1)[Compositions["sample_id"]].tolist()
Compositions["line"] = pd.crosstab(adata.obs["sample_id"], adata.obs["line"]).idxmax(axis=1)[Compositions["sample_id"]].tolist()
Compositions
sample_id | phase | size | SampleNormSize | Condition | line | |
---|---|---|---|---|---|---|
0 | 1_CTL04E_491 | G1 | 1181 | 0.428364 | ESTROGEN_AGONIST | CTL04E |
1 | 1_CTL04E_491 | G2M | 695 | 0.252086 | ESTROGEN_AGONIST | CTL04E |
2 | 1_CTL04E_491 | S | 881 | 0.319550 | ESTROGEN_AGONIST | CTL04E |
3 | 1_CTL04E_492 | G1 | 575 | 0.268441 | ESTROGEN_ANTAGONIST | CTL04E |
4 | 1_CTL04E_492 | G2M | 935 | 0.436508 | ESTROGEN_ANTAGONIST | CTL04E |
... | ... | ... | ... | ... | ... | ... |
250 | 3_CTL08A_505 | G2M | 349 | 0.174152 | DMSO | CTL08A |
251 | 3_CTL08A_505 | S | 580 | 0.289421 | DMSO | CTL08A |
252 | 3_CTL08A_509 | G1 | 925 | 0.396315 | RET_ANTAGONIST | CTL08A |
253 | 3_CTL08A_509 | G2M | 749 | 0.320908 | RET_ANTAGONIST | CTL08A |
254 | 3_CTL08A_509 | S | 660 | 0.282776 | RET_ANTAGONIST | CTL08A |
255 rows × 6 columns
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import matplotlib.patches as mpatches
# Sort the DataFrame by 'Condition' and 'line'
Compositions_sorted = Compositions.sort_values(["Condition", "line"])
# Ensure the sample order is preserved as a categorical type
Compositions_sorted["sample_id"] = pd.Categorical(
Compositions_sorted["sample_id"],
Compositions_sorted["sample_id"].unique().tolist()
)
# Plot with sorted x-axis
fig, ax = plt.subplots(1, 1, figsize=(30, 7), dpi=100)
sns.histplot(
data=Compositions_sorted,
x="sample_id",
hue="phase",
weights="SampleNormSize",
multiple="stack",
shrink=0.8,
ax=ax
)
ax.set_ylabel("Cells Fraction")
ax.set(ylim=(0, 1))
sns.despine(ax=ax)
# Try to retrieve the auto-generated legend handles/labels for phase
macro_handles, macro_labels = ax.get_legend_handles_labels()
# Remove the auto legend to avoid conflicts.
if ax.get_legend() is not None:
ax.get_legend().remove()
# If macro_handles is empty, build the legend manually.
if len(macro_handles) == 0:
macro_unique = Compositions_sorted["phase"].unique()
# Use a palette (here we use "deep" which is commonly used by seaborn)
macro_palette = sns.color_palette("deep", n_colors=len(macro_unique))
macro_dict = {macro: color for macro, color in zip(macro_unique, macro_palette)}
macro_handles = [mpatches.Patch(color=color, label=macro) for macro, color in macro_dict.items()]
macro_labels = list(macro_dict.keys())
# Group info by sample_id for Condition and line
group_info = Compositions_sorted.groupby("sample_id").agg({"Condition": "first", "line": "first"})
# Get x positions for categorical data
xticks = ax.get_xticks()
samples_order = list(Compositions_sorted["sample_id"].cat.categories)
sample_to_x = {sample: xtick for sample, xtick in zip(samples_order, xticks)}
# Assign colors for Conditions and lines
unique_conditions = group_info["Condition"].unique()
unique_lines = group_info["line"].unique()
condition_colors = {
cond: color for cond, color in zip(unique_conditions, sns.color_palette("tab10", n_colors=len(unique_conditions)))
}
line_colors = {
line: color for line, color in zip(unique_lines, sns.color_palette("pastel", n_colors=len(unique_lines)))
}
# Use a transform that uses data coordinates for x and axes coordinates for y
trans = ax.get_xaxis_transform()
# Add colored bars above the main plot for each sample
for sample, row in group_info.iterrows():
x_center = sample_to_x[sample]
rect_width = 1 # each category is assumed to be width 1
ax.add_patch(mpatches.Rectangle((x_center - 0.5, 1.02), rect_width, 0.02,
color=condition_colors[row["Condition"]],
transform=trans,
clip_on=False))
ax.add_patch(mpatches.Rectangle((x_center - 0.5, 1.05), rect_width, 0.02,
color=line_colors[row["line"]],
transform=trans,
clip_on=False))
# Create custom legends for Condition and line groupings
condition_legend = [mpatches.Patch(color=color, label=cond) for cond, color in condition_colors.items()]
line_legend = [mpatches.Patch(color=color, label=line) for line, color in line_colors.items()]
# Adjust the right margin to make room for legends
fig.subplots_adjust(right=1.5)
# Place legends outside the main axes
legend_macro = ax.legend(handles=macro_handles, labels=macro_labels, title="Phase", loc="upper left", bbox_to_anchor=(0, 1))
ax.add_artist(legend_macro)
legend_condition = ax.legend(handles=condition_legend, title="Condition", loc="upper left", bbox_to_anchor=(.955, .9))
ax.add_artist(legend_condition)
legend_line = ax.legend(handles=line_legend, title="Line", loc="upper left", bbox_to_anchor=(.955, 1.1))
ax.add_artist(legend_line)
ax.grid(False)
plt.xticks(rotation=90)
plt.savefig(f"./figures/{DS}.Prefilt.scaled.CyclingRate.svg", bbox_inches="tight")
plt.show()
adata.X = adata.layers["logNorm"].copy()
del adata.layers["logNorm"]
# Store consensus call info
adata.obs["Consensus_nIntersection"] = adata.obs[["ingestdLabels","scANVILabels","harmonyLabels"]].apply(lambda rows: rows.value_counts().max(), axis = 1)
adata.obs["Consensus"] = np.where(adata.obs["Consensus_nIntersection"] >= 2, True, False)
adata.obs["Consensus_call"] = adata.obs[["ingestdLabels","scANVILabels","harmonyLabels"]].apply(lambda rows: rows.value_counts().idxmax(), axis = 1)
sc.pp.highly_variable_genes(adata, batch_key="groupCov", flavor="seurat", n_top_genes=4000)
CommonHVGs = adata.var_names[adata.var["highly_variable_nbatches"] == len(adata.obs["groupCov"].unique())].tolist()
print(len(CommonHVGs))
adata.var["highly_variable"] = adata.var_names.isin(CommonHVGs)
adata.var["highly_variable"].sum()
sc.tl.pca(adata)
plotPCA_components(adata, color="line")
203 PlottingParams: figsize:(10, 10) dpi:100 dotsize:10 legend_loc:on data fontsize:8
sc.pp.neighbors(adata, n_pcs=10, n_neighbors=30)
sc.tl.umap(adata)
sc.pl.umap(adata, color=["line","TOP2A","STMN2","VIM","S100B"], size=10, vmin='p1', vmax='p99')
AddedFilters = pd.read_csv("./adatas/{}_AdditionalFilters.tsv".format(DS), sep="\t", index_col=0)
if pd.concat([adata.obs, AddedFilters], axis = 1).loc[adata.obs_names].isnull().any(axis=1).sum() == (adata.obs.shape[0] - AddedFilters.shape[0]):
print("(Mis)match between Additional filters tsv and anndata as expected, proceeding to merge")
adata.obs = pd.concat([adata.obs, AddedFilters], axis = 1).loc[adata.obs_names]
for col in AddedFilters.columns:
adata.obs[col] = adata.obs[col].fillna(False)
else:
print("Unexpected dimension mismatch between Additional filters tsv and anndata")
(Mis)match between Additional filters tsv and anndata as expected, proceeding to merge
plotSankey(adata.obs, covs=["Consensus_call"]+AddedFilters.columns.tolist())