import scanpy as sc
import pandas as pd
from matplotlib import pylab
import random
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import itertools
import plotly.express as px
import yaml
import numpy as np
import random
import anndata as ad
from scipy.sparse import csr_matrix, issparse
from scipy import sparse
from matplotlib.colors import TwoSlopeNorm
import scanpy.external as sce
import sys
import scvelo as scv
import anndata
import rapids_singlecell as rsc
import anndata
from pathlib import Path
try:
nb_fname = ipynbname.name()
except:
nb_fname = os.path.basename(globals()["__vsc_ipynb_file__"])
print(nb_fname)
10.2_Hormones_substudy_LineMagnification_Neurons.ipynb
sc.settings.set_figure_params(dpi=100, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (9, 9)
homeDir = os.getenv("HOME")
sys.path.insert(1, homeDir+"/utils/")
from PlotPCA_components import *
from AtlasClasses import *
DS="hormones_substudy"
celltype="Neurons"
ReferenceTissue="cortex"
with open(homeDir+"/utils/ReferenceDict.yaml", 'r') as file:
ReferencePaths = yaml.safe_load(file)
for k in list(ReferencePaths.keys()):
ReferencePaths[k]["adataPath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["adataPath"]
#ReferencePaths[k]["signaturePath"] = homeDir+ReferencePaths[k]["signaturePath"]
ReferencePaths[k]["signaturePath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["signaturePath"]
ReferencePaths[k]["signaturePurityPath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["signaturePurityPath"]
sc.settings.set_figure_params(dpi=80, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (6, 6)
dir_path = homeDir+"/adatas/{}_ByMacroCelltype".format(DS)
pcs=15
n_neighbors=30
seed = 437
adata = sc.read_h5ad(dir_path+"/{}.{}.h5ad".format(DS,celltype))
adata
AnnData object with n_obs × n_vars = 120550 × 36601 obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype' var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap' uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap' obsm: 'X_pca', 'X_pca_harmony', 'X_umap' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances'
# Clean ribo, mito anmd sec genes
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
adata = adata[:,~adata.var_names.str.startswith("MT-")].copy()
# ribosomal genes
adata = adata[:,~adata.var_names.str.startswith(("RPS", "RPL"))].copy()
# Sex genes
print(adata)
adata = adata[:,~adata.var_names.isin(pd.read_csv(homeDir+"/resources/X.genes.tsv", header=None)[0].tolist())].copy()
adata = adata[:,~adata.var_names.isin(pd.read_csv(homeDir+"/resources/Y.genes.tsv", header=None)[0].tolist())].copy()
print(adata)
# Re-normalize
adata.X = adata.layers["counts"].copy()
sc.pp.normalize_total(adata, 20000)
sc.pp.log1p(adata)
adata.layers["logNorm"] = adata.X.copy()
AnnData object with n_obs × n_vars = 120550 × 36485 obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype' var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap' uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap' obsm: 'X_pca', 'X_pca_harmony', 'X_umap' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances' AnnData object with n_obs × n_vars = 120550 × 35477 obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype' var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap' uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap' obsm: 'X_pca', 'X_pca_harmony', 'X_umap' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances' WARNING: adata.X seems to be already log-transformed.
sc.pl.umap(adata, color=["line","condition"])
adata.obs["condition"].value_counts()
condition ARYL_HYD_ANTAGONIST 12227 ANDROGEN_ANTAGONIST 10305 LIVER-X_AGONIST 9230 ARYL_HYD_AGONIST 8562 ESTROGEN_AGONIST 8322 RET_AGONIST 8321 THYROID_ANTAGONIST 8216 LIVER-X_ANTAGONIST 8199 GLUCOCORT_ANTAGONIST 7743 DMSO 7001 GLUCOCORT_AGONIST 6965 ANDROGEN_AGONIST 6845 THYROID_AGONIST 6714 ESTROGEN_ANTAGONIST 6420 RET_ANTAGONIST 5480 Name: count, dtype: int64
HVGSCondict = {}
for line in adata.obs["line"].unique().tolist():
adataGenotype = adata[adata.obs["line"] == line]
for condition in adataGenotype.obs["condition"].unique().tolist():
adataCOnd = adataGenotype[adataGenotype.obs["condition"] == condition].copy()
if condition not in list(HVGSCondict.keys()):
HVGSCondict[condition] = []
#Print Nsamples
nsamples = len(adataCOnd.obs["sample_id"].unique().tolist())
print(f"Found {nsamples} repliacates for {line} {condition}")
if nsamples < 3:
minOverlap = nsamples
else:
minOverlap = nsamples-1
print(f"Only HVGs common to {minOverlap} replicates will be kept")
sc.pp.highly_variable_genes(adataCOnd, batch_key="sample_id", flavor="seurat", n_top_genes=1000)
CommonHVGs = set(adataCOnd.var_names[adataCOnd.var["highly_variable_nbatches"] >= minOverlap])
nHVGs = len(CommonHVGs)
print(f"Found {nHVGs} common HVGs across {minOverlap} replicates for line {line} exposure {condition} ")
print("\n")
HVGSCondict[condition].append(CommonHVGs)
Found 3 repliacates for CTL04E ESTROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 552 common HVGs across 2 replicates for line CTL04E exposure ESTROGEN_AGONIST Found 3 repliacates for CTL04E ESTROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 553 common HVGs across 2 replicates for line CTL04E exposure ESTROGEN_ANTAGONIST Found 3 repliacates for CTL04E ANDROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 512 common HVGs across 2 replicates for line CTL04E exposure ANDROGEN_AGONIST Found 3 repliacates for CTL04E ANDROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 545 common HVGs across 2 replicates for line CTL04E exposure ANDROGEN_ANTAGONIST Found 3 repliacates for CTL04E ARYL_HYD_AGONIST Only HVGs common to 2 replicates will be kept Found 573 common HVGs across 2 replicates for line CTL04E exposure ARYL_HYD_AGONIST Found 3 repliacates for CTL04E ARYL_HYD_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 674 common HVGs across 2 replicates for line CTL04E exposure ARYL_HYD_ANTAGONIST Found 3 repliacates for CTL04E LIVER-X_AGONIST Only HVGs common to 2 replicates will be kept Found 552 common HVGs across 2 replicates for line CTL04E exposure LIVER-X_AGONIST Found 3 repliacates for CTL04E LIVER-X_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 706 common HVGs across 2 replicates for line CTL04E exposure LIVER-X_ANTAGONIST Found 3 repliacates for CTL04E GLUCOCORT_AGONIST Only HVGs common to 2 replicates will be kept Found 548 common HVGs across 2 replicates for line CTL04E exposure GLUCOCORT_AGONIST Found 3 repliacates for CTL04E GLUCOCORT_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 537 common HVGs across 2 replicates for line CTL04E exposure GLUCOCORT_ANTAGONIST Found 3 repliacates for CTL04E THYROID_AGONIST Only HVGs common to 2 replicates will be kept Found 550 common HVGs across 2 replicates for line CTL04E exposure THYROID_AGONIST Found 3 repliacates for CTL04E THYROID_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 489 common HVGs across 2 replicates for line CTL04E exposure THYROID_ANTAGONIST Found 3 repliacates for CTL04E DMSO Only HVGs common to 2 replicates will be kept Found 508 common HVGs across 2 replicates for line CTL04E exposure DMSO Found 1 repliacates for CTL04E RET_AGONIST Only HVGs common to 1 replicates will be kept Found 1000 common HVGs across 1 replicates for line CTL04E exposure RET_AGONIST Found 3 repliacates for CTL04E RET_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 516 common HVGs across 2 replicates for line CTL04E exposure RET_ANTAGONIST Found 3 repliacates for CTL08A ESTROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 554 common HVGs across 2 replicates for line CTL08A exposure ESTROGEN_AGONIST Found 3 repliacates for CTL08A ESTROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 568 common HVGs across 2 replicates for line CTL08A exposure ESTROGEN_ANTAGONIST Found 3 repliacates for CTL08A ANDROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 572 common HVGs across 2 replicates for line CTL08A exposure ANDROGEN_AGONIST Found 3 repliacates for CTL08A ANDROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 548 common HVGs across 2 replicates for line CTL08A exposure ANDROGEN_ANTAGONIST Found 3 repliacates for CTL08A ARYL_HYD_AGONIST Only HVGs common to 2 replicates will be kept Found 568 common HVGs across 2 replicates for line CTL08A exposure ARYL_HYD_AGONIST Found 3 repliacates for CTL08A ARYL_HYD_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 670 common HVGs across 2 replicates for line CTL08A exposure ARYL_HYD_ANTAGONIST Found 3 repliacates for CTL08A LIVER-X_AGONIST Only HVGs common to 2 replicates will be kept Found 626 common HVGs across 2 replicates for line CTL08A exposure LIVER-X_AGONIST Found 3 repliacates for CTL08A LIVER-X_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 738 common HVGs across 2 replicates for line CTL08A exposure LIVER-X_ANTAGONIST Found 3 repliacates for CTL08A GLUCOCORT_AGONIST Only HVGs common to 2 replicates will be kept Found 536 common HVGs across 2 replicates for line CTL08A exposure GLUCOCORT_AGONIST Found 3 repliacates for CTL08A GLUCOCORT_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 550 common HVGs across 2 replicates for line CTL08A exposure GLUCOCORT_ANTAGONIST Found 3 repliacates for CTL08A THYROID_AGONIST Only HVGs common to 2 replicates will be kept Found 537 common HVGs across 2 replicates for line CTL08A exposure THYROID_AGONIST Found 3 repliacates for CTL08A THYROID_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 484 common HVGs across 2 replicates for line CTL08A exposure THYROID_ANTAGONIST Found 3 repliacates for CTL08A DMSO Only HVGs common to 2 replicates will be kept Found 586 common HVGs across 2 replicates for line CTL08A exposure DMSO Found 1 repliacates for CTL08A RET_AGONIST Only HVGs common to 1 replicates will be kept Found 1000 common HVGs across 1 replicates for line CTL08A exposure RET_AGONIST Found 2 repliacates for CTL08A RET_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 441 common HVGs across 2 replicates for line CTL08A exposure RET_ANTAGONIST
sc.pp.highly_variable_genes(adata, batch_key="line", flavor="seurat", n_top_genes=1000)
HVGSLine = set(adata.var_names[adata.var["highly_variable_nbatches"] >=1].tolist())
nHVGSLine = len(HVGSLine)
print(f"Found {nHVGSLine} common pan-condition HVGs across both lines ")
Found 1601 common pan-condition HVGs across both lines
##### First HVGs set
##### First HVGs set
IntersectionDEGs = []
for k in list(HVGSCondict.keys()):
IntersectionDEGsCond = list(set.union(*HVGSCondict[k]))
nIntersectionDEGsCond = len(IntersectionDEGsCond)
#print(f"Found {nIntersectionDEGsCond} HVGs from the intersection of both genotypes replicates for exposure {condition} ")
IntersectionDEGs.extend(IntersectionDEGsCond)
IntersectionDEGs = set(IntersectionDEGs)
nIntersectionDEGsCond = len(IntersectionDEGs)
print(f"Found {nIntersectionDEGsCond} HVGs from the union of both genotypes replicates for exposure {condition} ")
##### Second HVGs set
##### Second HVGs set
FinalHVGs = list(IntersectionDEGs.union(HVGSLine))
nFinalHVGs = len(FinalHVGs)
print(f"Found {nFinalHVGs} HVGs from the union of both genotypes replicates and across exposures ")
Found 3998 HVGs from the union of both genotypes replicates for exposure RET_ANTAGONIST Found 4608 HVGs from the union of both genotypes replicates and across exposures
adata
AnnData object with n_obs × n_vars = 120550 × 35477 obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype' var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap' uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap' obsm: 'X_pca', 'X_pca_harmony', 'X_umap' varm: 'PCs' layers: 'counts', 'logNorm' obsp: 'connectivities', 'distances'
adata.var["highly_variable_seurat_overlap"] = adata.var_names.isin(FinalHVGs)
print(adata.var["highly_variable_seurat_overlap"].sum())
adata.var["highly_variable"] = adata.var["highly_variable_seurat_overlap"]
sc.pp.scale(adata)
adata.layers["scaled"] = adata.X.copy()
sc.pp.pca(adata, use_highly_variable=True)
sc.pp.neighbors(adata, n_pcs=pcs, n_neighbors=n_neighbors, random_state=seed)
adata.X = adata.layers["logNorm"].copy()
4608
sc.tl.umap(adata, random_state=seed)
sc.tl.draw_graph(adata)
sc.pl.umap(adata, color=["line","condition","Consensus_call"], size=10, vmin='p1', vmax='p99', wspace=.2, ncols=2)
sc.pl.draw_graph(adata, color=["line","condition","Consensus_call"], size=10, vmin='p1', vmax='p99', wspace=.2, ncols=2)
print(adata.X.max())
print(adata.X.min())
del adata.layers["scaled"]
del adata.layers["logNorm"]
# Store results
adata.write_h5ad(dir_path+"/{}.{}.LineMagnification.h5ad".format(DS,celltype))
adata = sc.read_h5ad(dir_path+"/{}.{}.LineMagnification.h5ad".format(DS,celltype))
adataPbulk = adata.copy()
adataPbulk.obs["line_condition"] = adataPbulk.obs["line"].astype(str)+"_"+ adataPbulk.obs["condition"].astype(str)
import hashlib Xtab = pd.crosstab(adataPbulk.obs["condition"],adataPbulk.obs["line"]) print(Xtab) minCells = Xtab.min().min() Compositionhash = hashlib.sha256(Xtab.sort_index(axis=1).to_csv(index=True).encode('utf-8')).hexdigest()
SelectedCells = [] SelectedCellsFile = "./{}.{}.selectedBCs.txt".format(nb_fname,Compositionhash) if os.path.isfile(SelectedCellsFile): print("Loading existing cells subset") SelectedCells = pd.read_csv(SelectedCellsFile)["0"].tolist() else: print("File not found or compositions are changed, performing downsampling to balance conditions/lines") for line in adataPbulk.obs["line_condition"].unique(): pbulkCond = adataPbulk[(adataPbulk.obs["line_condition"] == line)]
pbulkCondCells = random.sample(pbulkCond.obs_names.tolist(), minCells)
SelectedCells.extend(pbulkCondCells)
pd.Series(SelectedCells).to_csv(SelectedCellsFile)
adataPbulk = adataPbulk[SelectedCells] pd.crosstab(adataPbulk.obs["condition"],adataPbulk.obs["line"])
adataPbulk = AtlasTile(adataPbulk, regionObs="harmonizedRegion", tissueObs="tissue", ageObs="PCW", study=DS,scalevar=True, groupCov="groupCov")
adataPbulk.AggregateCounts(method="pseudoBulk", groupcov="sample_id", layer="counts", cellStateObs="condition", prenormalize=False, postnormalize=False)
adataPbulk = adataPbulk.uns["pseudoBulkadata.layer_counts.by_sample_id_condition"].copy()
Pseudobulking counts for sample 3_CTL04E_491 by condition obs Pseudobulking counts for sample 2_CTL08A_491 by condition obs Pseudobulking counts for sample 3_CTL08A_491 by condition obs Pseudobulking counts for sample 2_CTL04E_491 by condition obs Pseudobulking counts for sample 1_CTL08A_491 by condition obs Pseudobulking counts for sample 1_CTL04E_491 by condition obs Pseudobulking counts for sample 3_CTL04E_492 by condition obs Pseudobulking counts for sample 1_CTL08A_492 by condition obs Pseudobulking counts for sample 1_CTL04E_492 by condition obs Pseudobulking counts for sample 2_CTL04E_492 by condition obs Pseudobulking counts for sample 2_CTL08A_492 by condition obs Pseudobulking counts for sample 3_CTL08A_492 by condition obs Pseudobulking counts for sample 2_CTL08A_493 by condition obs Pseudobulking counts for sample 3_CTL04E_493 by condition obs Pseudobulking counts for sample 3_CTL08A_493 by condition obs Pseudobulking counts for sample 2_CTL04E_493 by condition obs Pseudobulking counts for sample 1_CTL04E_493 by condition obs Pseudobulking counts for sample 1_CTL08A_493 by condition obs Pseudobulking counts for sample 2_CTL08A_494 by condition obs Pseudobulking counts for sample 2_CTL04E_494 by condition obs Pseudobulking counts for sample 1_CTL04E_494 by condition obs Pseudobulking counts for sample 3_CTL04E_494 by condition obs Pseudobulking counts for sample 3_CTL08A_494 by condition obs Pseudobulking counts for sample 1_CTL08A_494 by condition obs Pseudobulking counts for sample 1_CTL04E_495 by condition obs Pseudobulking counts for sample 2_CTL04E_495 by condition obs Pseudobulking counts for sample 3_CTL08A_495 by condition obs Pseudobulking counts for sample 3_CTL04E_495 by condition obs Pseudobulking counts for sample 1_CTL08A_495 by condition obs Pseudobulking counts for sample 2_CTL08A_495 by condition obs Pseudobulking counts for sample 2_CTL04E_496 by condition obs Pseudobulking counts for sample 2_CTL08A_496 by condition obs Pseudobulking counts for sample 1_CTL08A_496 by condition obs Pseudobulking counts for sample 3_CTL04E_496 by condition obs Pseudobulking counts for sample 1_CTL04E_496 by condition obs Pseudobulking counts for sample 3_CTL08A_496 by condition obs Pseudobulking counts for sample 3_CTL04E_497 by condition obs Pseudobulking counts for sample 2_CTL04E_497 by condition obs Pseudobulking counts for sample 3_CTL08A_497 by condition obs Pseudobulking counts for sample 2_CTL08A_497 by condition obs Pseudobulking counts for sample 1_CTL04E_497 by condition obs Pseudobulking counts for sample 1_CTL08A_497 by condition obs Pseudobulking counts for sample 1_CTL04E_498 by condition obs Pseudobulking counts for sample 3_CTL08A_498 by condition obs Pseudobulking counts for sample 1_CTL08A_498 by condition obs Pseudobulking counts for sample 3_CTL04E_498 by condition obs Pseudobulking counts for sample 2_CTL04E_498 by condition obs Pseudobulking counts for sample 2_CTL08A_498 by condition obs Pseudobulking counts for sample 1_CTL08A_499 by condition obs Pseudobulking counts for sample 2_CTL04E_499 by condition obs Pseudobulking counts for sample 3_CTL04E_499 by condition obs Pseudobulking counts for sample 3_CTL08A_499 by condition obs Pseudobulking counts for sample 1_CTL04E_499 by condition obs Pseudobulking counts for sample 2_CTL08A_499 by condition obs Pseudobulking counts for sample 2_CTL04E_500 by condition obs Pseudobulking counts for sample 1_CTL04E_500 by condition obs Pseudobulking counts for sample 3_CTL04E_500 by condition obs Pseudobulking counts for sample 2_CTL08A_500 by condition obs Pseudobulking counts for sample 1_CTL08A_500 by condition obs Pseudobulking counts for sample 3_CTL08A_500 by condition obs Pseudobulking counts for sample 3_CTL08A_501 by condition obs Pseudobulking counts for sample 3_CTL04E_501 by condition obs Pseudobulking counts for sample 1_CTL08A_501 by condition obs Pseudobulking counts for sample 2_CTL04E_501 by condition obs Pseudobulking counts for sample 1_CTL04E_501 by condition obs Pseudobulking counts for sample 2_CTL08A_501 by condition obs Pseudobulking counts for sample 3_CTL04E_502 by condition obs Pseudobulking counts for sample 3_CTL08A_502 by condition obs Pseudobulking counts for sample 1_CTL04E_502 by condition obs Pseudobulking counts for sample 1_CTL08A_502 by condition obs Pseudobulking counts for sample 2_CTL08A_502 by condition obs Pseudobulking counts for sample 2_CTL04E_502 by condition obs Pseudobulking counts for sample 2_CTL08A_505 by condition obs Pseudobulking counts for sample 2_CTL04E_505 by condition obs Pseudobulking counts for sample 1_CTL08A_505 by condition obs Pseudobulking counts for sample 3_CTL04E_505 by condition obs Pseudobulking counts for sample 1_CTL04E_505 by condition obs Pseudobulking counts for sample 3_CTL08A_505 by condition obs Pseudobulking counts for sample 1_CTL08A_508 by condition obs Pseudobulking counts for sample 1_CTL04E_508 by condition obs Pseudobulking counts for sample 1_CTL08A_509 by condition obs Pseudobulking counts for sample 3_CTL04E_509 by condition obs Pseudobulking counts for sample 1_CTL04E_509 by condition obs Pseudobulking counts for sample 2_CTL04E_509 by condition obs Pseudobulking counts for sample 3_CTL08A_509 by condition obs
def sanitize_strings(value):
if isinstance(value, str): # Only process string values
return (
value.replace('_', '') # Replace underscores with spaces
.replace('-', '') # Replace hyphens with spaces
.replace('@', '') # Example: Remove "@" character
)
return value # Leave non-strings untouched
Counts = adataPbulk.to_df(layer="pseudoBulk_summedCounts").T
MD = adataPbulk.obs[["condition","line"]]
MD["sample"] = MD.index.tolist()
MD = MD.applymap(sanitize_strings).loc[MD.index]
MD
condition | line | sample | |
---|---|---|---|
3_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 3CTL04E491.ESTROGENAGONIST |
2_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 2CTL08A491.ESTROGENAGONIST |
3_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 3CTL08A491.ESTROGENAGONIST |
2_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 2CTL04E491.ESTROGENAGONIST |
1_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 1CTL08A491.ESTROGENAGONIST |
... | ... | ... | ... |
1_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 1CTL08A509.RETANTAGONIST |
3_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 3CTL04E509.RETANTAGONIST |
1_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 1CTL04E509.RETANTAGONIST |
2_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 2CTL04E509.RETANTAGONIST |
3_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 3CTL08A509.RETANTAGONIST |
85 rows × 3 columns
%load_ext rpy2.ipython
pd.DataFrame.iteritems = pd.DataFrame.items
%%R -i MD -i Counts
library(edgeR)
library(org.Hs.eg.db)
library(AnnotationDbi)
library(stats)
library(topGO)
R[write to console]: Loading required package: limma R[write to console]: Loading required package: AnnotationDbi R[write to console]: Loading required package: stats4 R[write to console]: Loading required package: BiocGenerics R[write to console]: Attaching package: ‘BiocGenerics’ R[write to console]: The following object is masked from ‘package:limma’: plotMA R[write to console]: The following objects are masked from ‘package:stats’: IQR, mad, sd, var, xtabs R[write to console]: The following objects are masked from ‘package:base’: anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which.max, which.min R[write to console]: Loading required package: Biobase R[write to console]: Welcome to Bioconductor Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'. R[write to console]: Loading required package: IRanges R[write to console]: Loading required package: S4Vectors R[write to console]: Attaching package: ‘S4Vectors’ R[write to console]: The following object is masked from ‘package:utils’: findMatches R[write to console]: The following objects are masked from ‘package:base’: expand.grid, I, unname R[write to console]: R[write to console]: Loading required package: graph R[write to console]: Loading required package: GO.db R[write to console]: R[write to console]: Loading required package: SparseM R[write to console]: Attaching package: ‘SparseM’ R[write to console]: The following object is masked from ‘package:base’: backsolve R[write to console]: groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built. R[write to console]: Attaching package: ‘topGO’ R[write to console]: The following object is masked from ‘package:IRanges’: members
MD
condition | line | sample | |
---|---|---|---|
3_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 3CTL04E491.ESTROGENAGONIST |
2_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 2CTL08A491.ESTROGENAGONIST |
3_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 3CTL08A491.ESTROGENAGONIST |
2_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 2CTL04E491.ESTROGENAGONIST |
1_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 1CTL08A491.ESTROGENAGONIST |
... | ... | ... | ... |
1_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 1CTL08A509.RETANTAGONIST |
3_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 3CTL04E509.RETANTAGONIST |
1_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 1CTL04E509.RETANTAGONIST |
2_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 2CTL04E509.RETANTAGONIST |
3_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 3CTL08A509.RETANTAGONIST |
85 rows × 3 columns
%%R -i MD -i Counts -o MDS
CondFactor <- factor(MD[["condition"]],
levels = c("DMSO", setdiff(unique(MD[["condition"]]), "DMSO")))
lineFactor <- factor(MD[["line"]],
levels = c("CTL08A","CTL04E"))
mm <- model.matrix(~0+lineFactor+CondFactor )
rownames(mm) <- colnames(Counts)
dds <- DGEList(Counts, group = CondFactor, genes = rownames(Counts))
# Add metadata to dge
dds$samples$line <- lineFactor
dds$samples$nCells <- MD[["nCells"]]
keep.genes<-filterByExpr(dds,design=mm)
print(table(keep.genes))
dds <- dds[keep.genes,, keep.lib.sizes=FALSE]
dds <- calcNormFactors(dds)
# Store MDS
png(tempfile(fileext = ".png"))
MDS <- plotMDS(dds, pch=16, main="MDS")
dev.off()
#Fit
dds <- estimateGLMRobustDisp(dds, mm)
fit <- glmQLFit(dds, mm)
print(dds$samples)
plotBCV(dds)
plotQLDisp(fit)
keep.genes FALSE TRUE 14985 20492 group lib.size norm.factors 3_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 4506223 1.0105022 2_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 7880413 1.0371210 3_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 10571764 1.0012554 2_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 6635653 0.9742494 1_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 10075647 1.0146013 1_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 6610820 0.9786996 3_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 6472394 1.0837762 1_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 8313620 1.0617799 1_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 5277813 1.0257419 2_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 6584857 1.0396631 2_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 5921627 1.0585013 3_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 9512274 1.0489234 2_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 5889247 1.0675556 3_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 4196274 1.0117911 3_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 10380156 1.0564750 2_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 3587522 1.0239360 1_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 5268490 1.0234005 1_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 8123624 1.0603798 2_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 7308529 1.0333175 2_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 8141237 0.9988268 1_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 6836584 1.0214603 3_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 6162274 1.0340825 3_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 11243091 1.0327739 1_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 11436707 1.0631621 1_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 7959705 0.9579851 2_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 7318266 0.9596384 3_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 9928610 1.0264472 3_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 7402363 0.9501675 1_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 10763131 1.0027056 2_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 9833980 1.0103967 2_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 10442690 0.9764821 2_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 18306133 1.0306764 1_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 17365020 1.0507819 3_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 10888637 0.9707359 1_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 9154621 0.9793797 3_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 17244841 1.0500827 3_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 8768082 0.9629603 2_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 9403110 0.9394795 3_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 10870575 0.9719186 2_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 11363440 0.9901663 1_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 8494381 0.9681336 1_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 11417965 0.9558378 1_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 6546641 1.0146554 3_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 12990696 1.0378899 1_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 9453799 1.0130420 3_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 6273425 1.0120743 2_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 5326492 1.0058744 2_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 12353760 1.0093471 1_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 8002881 0.9723379 2_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 4898268 0.9468700 3_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 4272847 0.9735537 3_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 9558709 0.9846125 1_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 4889538 0.9448780 2_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 6903380 1.0028450 2_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 6180425 0.8940352 1_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 5956997 0.9475558 3_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 4633565 0.9496488 2_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 6023408 0.9533385 1_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 8249680 0.9371614 3_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 7436954 0.9453448 3_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 8637321 1.0191955 3_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 4464469 0.9732673 1_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 8857754 1.0021221 2_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 5411483 0.9785358 1_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 5929016 0.9787484 2_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 8488455 1.0195483 3_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 5449247 0.9555885 3_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 9847129 0.9985029 1_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 2477114 0.9712344 1_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 9094620 0.9828777 2_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 7934447 1.0255668 2_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 5329408 0.9583959 2_CTL08A_505.DMSO DMSO 10464019 0.9803706 2_CTL04E_505.DMSO DMSO 4722699 0.9655423 1_CTL08A_505.DMSO DMSO 8794699 0.9789364 3_CTL04E_505.DMSO DMSO 4854323 0.9742403 1_CTL04E_505.DMSO DMSO 3663543 0.9676696 3_CTL08A_505.DMSO DMSO 7575222 0.9593275 1_CTL08A_508.RET_AGONIST RETAGONIST 17558311 1.1595676 1_CTL04E_508.RET_AGONIST RETAGONIST 16129348 1.1650216 1_CTL08A_509.RET_ANTAGONIST RETANTAGONIST 9977724 1.0316682 3_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 3755716 0.9789962 1_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 4635743 0.9946011 2_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 3840109 0.9874942 3_CTL08A_509.RET_ANTAGONIST RETANTAGONIST 9688062 1.0140777 line 3_CTL04E_491.ESTROGEN_AGONIST CTL04E 2_CTL08A_491.ESTROGEN_AGONIST CTL08A 3_CTL08A_491.ESTROGEN_AGONIST CTL08A 2_CTL04E_491.ESTROGEN_AGONIST CTL04E 1_CTL08A_491.ESTROGEN_AGONIST CTL08A 1_CTL04E_491.ESTROGEN_AGONIST CTL04E 3_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E 1_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A 1_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E 2_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E 2_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A 3_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A 2_CTL08A_493.ANDROGEN_AGONIST CTL08A 3_CTL04E_493.ANDROGEN_AGONIST CTL04E 3_CTL08A_493.ANDROGEN_AGONIST CTL08A 2_CTL04E_493.ANDROGEN_AGONIST CTL04E 1_CTL04E_493.ANDROGEN_AGONIST CTL04E 1_CTL08A_493.ANDROGEN_AGONIST CTL08A 2_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A 2_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E 1_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E 3_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E 3_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A 1_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A 1_CTL04E_495.ARYL_HYD_AGONIST CTL04E 2_CTL04E_495.ARYL_HYD_AGONIST CTL04E 3_CTL08A_495.ARYL_HYD_AGONIST CTL08A 3_CTL04E_495.ARYL_HYD_AGONIST CTL04E 1_CTL08A_495.ARYL_HYD_AGONIST CTL08A 2_CTL08A_495.ARYL_HYD_AGONIST CTL08A 2_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E 2_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A 1_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A 3_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E 1_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E 3_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A 3_CTL04E_497.LIVER-X_AGONIST CTL04E 2_CTL04E_497.LIVER-X_AGONIST CTL04E 3_CTL08A_497.LIVER-X_AGONIST CTL08A 2_CTL08A_497.LIVER-X_AGONIST CTL08A 1_CTL04E_497.LIVER-X_AGONIST CTL04E 1_CTL08A_497.LIVER-X_AGONIST CTL08A 1_CTL04E_498.LIVER-X_ANTAGONIST CTL04E 3_CTL08A_498.LIVER-X_ANTAGONIST CTL08A 1_CTL08A_498.LIVER-X_ANTAGONIST CTL08A 3_CTL04E_498.LIVER-X_ANTAGONIST CTL04E 2_CTL04E_498.LIVER-X_ANTAGONIST CTL04E 2_CTL08A_498.LIVER-X_ANTAGONIST CTL08A 1_CTL08A_499.GLUCOCORT_AGONIST CTL08A 2_CTL04E_499.GLUCOCORT_AGONIST CTL04E 3_CTL04E_499.GLUCOCORT_AGONIST CTL04E 3_CTL08A_499.GLUCOCORT_AGONIST CTL08A 1_CTL04E_499.GLUCOCORT_AGONIST CTL04E 2_CTL08A_499.GLUCOCORT_AGONIST CTL08A 2_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E 1_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E 3_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E 2_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A 1_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A 3_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A 3_CTL08A_501.THYROID_AGONIST CTL08A 3_CTL04E_501.THYROID_AGONIST CTL04E 1_CTL08A_501.THYROID_AGONIST CTL08A 2_CTL04E_501.THYROID_AGONIST CTL04E 1_CTL04E_501.THYROID_AGONIST CTL04E 2_CTL08A_501.THYROID_AGONIST CTL08A 3_CTL04E_502.THYROID_ANTAGONIST CTL04E 3_CTL08A_502.THYROID_ANTAGONIST CTL08A 1_CTL04E_502.THYROID_ANTAGONIST CTL04E 1_CTL08A_502.THYROID_ANTAGONIST CTL08A 2_CTL08A_502.THYROID_ANTAGONIST CTL08A 2_CTL04E_502.THYROID_ANTAGONIST CTL04E 2_CTL08A_505.DMSO CTL08A 2_CTL04E_505.DMSO CTL04E 1_CTL08A_505.DMSO CTL08A 3_CTL04E_505.DMSO CTL04E 1_CTL04E_505.DMSO CTL04E 3_CTL08A_505.DMSO CTL08A 1_CTL08A_508.RET_AGONIST CTL08A 1_CTL04E_508.RET_AGONIST CTL04E 1_CTL08A_509.RET_ANTAGONIST CTL08A 3_CTL04E_509.RET_ANTAGONIST CTL04E 1_CTL04E_509.RET_ANTAGONIST CTL04E 2_CTL04E_509.RET_ANTAGONIST CTL04E 3_CTL08A_509.RET_ANTAGONIST CTL08A
adataPbulk.obsm["MDS"] = np.vstack([MDS["x"],MDS["y"]]).T
sc.pl.embedding(adataPbulk, basis="MDS", color=["line","condition","nCells"], wspace=.6, size=400)
%%R -o SharedConditionVsDmso
AllGenes <- dim(Counts)[1]
SharedConditionVsDmso <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Construct the contrast expression
contrastExpression <- paste("CondFactor", condition, sep = "")
print(contrastExpression)
# Use the correct argument name 'contrasts' in makeContrasts
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
# Remove the extraneous comma and correctly pass all parameters to glmQLFTest
SharedConditionVsDmso[[paste(condition, ".VS.DMSO",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "logFC")$table
# Optionally, print or store the results
}
[1] "CondFactorESTROGENAGONIST" [1] "CondFactorESTROGENANTAGONIST" [1] "CondFactorANDROGENAGONIST" [1] "CondFactorANDROGENANTAGONIST" [1] "CondFactorARYLHYDAGONIST" [1] "CondFactorARYLHYDANTAGONIST" [1] "CondFactorLIVERXAGONIST" [1] "CondFactorLIVERXANTAGONIST" [1] "CondFactorGLUCOCORTAGONIST" [1] "CondFactorGLUCOCORTANTAGONIST" [1] "CondFactorTHYROIDAGONIST" [1] "CondFactorTHYROIDANTAGONIST" [1] "CondFactorRETAGONIST" [1] "CondFactorRETANTAGONIST"
%%R -o SharedAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
SharedAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
print(condition)
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
# Construct the contrast expression; adjust the separator as needed
contrastExpression <- paste("CondFactor", condition, "-", "CondFactor", condAntagonist, sep = "")
# Create the contrast using makeContrasts (ensure mm is defined)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
# Perform the glmQLFTest and extract the topTags table;
# ensure AllGenes is defined as the number of genes to report
SharedAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"LineShared",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast),
adjust.method = "fdr",
n = AllGenes,
sort.by = "logFC")$table
}
}
[1] "ESTROGENAGONIST" [1] "ESTROGENANTAGONIST" [1] "ANDROGENAGONIST" [1] "ANDROGENANTAGONIST" [1] "ARYLHYDAGONIST" [1] "ARYLHYDANTAGONIST" [1] "LIVERXAGONIST" [1] "LIVERXANTAGONIST" [1] "GLUCOCORTAGONIST" [1] "GLUCOCORTANTAGONIST" [1] "THYROIDAGONIST" [1] "THYROIDANTAGONIST" [1] "RETAGONIST" [1] "RETANTAGONIST"
%%R -i MD -i Counts -o MDS
Group <- factor(paste(MD$line,MD$condition,sep="."))
mm <-model.matrix(~0+Group, data=Group)
colnames(mm)<-levels(Group)
rownames(mm)<-rownames(MD)
dds <- DGEList(Counts, group = Group, genes = rownames(Counts))
keep.genes<-filterByExpr(dds,design=mm)
dds <- dds[keep.genes,, keep.lib.sizes=FALSE]
dds <- calcNormFactors(dds)
dds <- estimateGLMRobustDisp(dds, mm)
fit <- glmQLFit(dds, mm)
# Store MDS
png(tempfile(fileext = ".png"))
MDS <- plotMDS(dds, pch=16, main="MDS")
dev.off()
print(dds$samples)
plotBCV(dds)
group lib.size 3_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 4507572 2_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 7882953 3_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 10574899 2_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 6637449 1_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 10078775 1_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 6612690 3_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 6474589 1_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 8316210 1_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 5279470 2_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 6586853 2_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 5923504 3_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 9515096 2_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 5891373 3_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 4197851 3_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 10384185 2_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 3588783 1_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 5270309 1_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 8126510 2_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 7311044 2_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 8143612 1_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 6838932 3_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 6164523 3_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 11246639 1_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 11440904 1_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 7961702 2_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 7320284 3_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 9931432 3_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 7404226 1_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 10766239 2_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 9836854 2_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 10445586 2_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 18311649 1_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 17371527 3_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 10891755 1_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 9157150 3_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 17250384 3_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 8770420 2_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 9405482 3_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 10873305 2_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 11366499 1_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 8496579 1_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 11420634 1_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 6548666 3_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 12994627 1_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 9456556 3_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 6275308 2_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 5328041 2_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 12357385 1_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 8005353 2_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 4899582 3_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 4274187 3_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 9562181 1_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 4890989 2_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 6905806 2_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 6182072 1_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 5958742 3_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 4634932 2_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 6025228 1_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 8252109 3_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 7439342 3_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 8639810 3_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 4466026 1_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 8860278 2_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 5412927 1_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 5930588 2_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 8491023 3_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 5450621 3_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 9849883 1_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 2477807 1_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 9097493 2_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 7936874 2_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 5330888 2_CTL08A_505.DMSO CTL08A.DMSO 10466916 2_CTL04E_505.DMSO CTL04E.DMSO 4724773 1_CTL08A_505.DMSO CTL08A.DMSO 8797791 3_CTL04E_505.DMSO CTL04E.DMSO 4855880 1_CTL04E_505.DMSO CTL04E.DMSO 3664568 3_CTL08A_505.DMSO CTL08A.DMSO 7577513 1_CTL08A_508.RET_AGONIST CTL08A.RETAGONIST 17568712 1_CTL04E_508.RET_AGONIST CTL04E.RETAGONIST 16140576 1_CTL08A_509.RET_ANTAGONIST CTL08A.RETANTAGONIST 9980971 3_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 3756860 1_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 4637133 2_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 3841238 3_CTL08A_509.RET_ANTAGONIST CTL08A.RETANTAGONIST 9690941 norm.factors 3_CTL04E_491.ESTROGEN_AGONIST 1.0116313 2_CTL08A_491.ESTROGEN_AGONIST 1.0367609 3_CTL08A_491.ESTROGEN_AGONIST 0.9977125 2_CTL04E_491.ESTROGEN_AGONIST 0.9780278 1_CTL08A_491.ESTROGEN_AGONIST 1.0098223 1_CTL04E_491.ESTROGEN_AGONIST 0.9830482 3_CTL04E_492.ESTROGEN_ANTAGONIST 1.0848373 1_CTL08A_492.ESTROGEN_ANTAGONIST 1.0634887 1_CTL04E_492.ESTROGEN_ANTAGONIST 1.0349239 2_CTL04E_492.ESTROGEN_ANTAGONIST 1.0422559 2_CTL08A_492.ESTROGEN_ANTAGONIST 1.0638383 3_CTL08A_492.ESTROGEN_ANTAGONIST 1.0538380 2_CTL08A_493.ANDROGEN_AGONIST 1.0641311 3_CTL04E_493.ANDROGEN_AGONIST 1.0232572 3_CTL08A_493.ANDROGEN_AGONIST 1.0415615 2_CTL04E_493.ANDROGEN_AGONIST 1.0251596 1_CTL04E_493.ANDROGEN_AGONIST 1.0255222 1_CTL08A_493.ANDROGEN_AGONIST 1.0494780 2_CTL08A_494.ANDROGEN_ANTAGONIST 1.0441893 2_CTL04E_494.ANDROGEN_ANTAGONIST 1.0037397 1_CTL04E_494.ANDROGEN_ANTAGONIST 1.0339053 3_CTL04E_494.ANDROGEN_ANTAGONIST 1.0515291 3_CTL08A_494.ANDROGEN_ANTAGONIST 1.0359530 1_CTL08A_494.ANDROGEN_ANTAGONIST 1.0622477 1_CTL04E_495.ARYL_HYD_AGONIST 0.9507289 2_CTL04E_495.ARYL_HYD_AGONIST 0.9655621 3_CTL08A_495.ARYL_HYD_AGONIST 1.0153127 3_CTL04E_495.ARYL_HYD_AGONIST 0.9537987 1_CTL08A_495.ARYL_HYD_AGONIST 0.9948123 2_CTL08A_495.ARYL_HYD_AGONIST 1.0037303 2_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9753806 2_CTL08A_496.ARYL_HYD_ANTAGONIST 1.0341837 1_CTL08A_496.ARYL_HYD_ANTAGONIST 1.0479335 3_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9754279 1_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9762744 3_CTL08A_496.ARYL_HYD_ANTAGONIST 1.0585913 3_CTL04E_497.LIVER-X_AGONIST 0.9592887 2_CTL04E_497.LIVER-X_AGONIST 0.9398888 3_CTL08A_497.LIVER-X_AGONIST 0.9643908 2_CTL08A_497.LIVER-X_AGONIST 0.9798928 1_CTL04E_497.LIVER-X_AGONIST 0.9617859 1_CTL08A_497.LIVER-X_AGONIST 0.9515573 1_CTL04E_498.LIVER-X_ANTAGONIST 1.0236481 3_CTL08A_498.LIVER-X_ANTAGONIST 1.0455333 1_CTL08A_498.LIVER-X_ANTAGONIST 1.0247475 3_CTL04E_498.LIVER-X_ANTAGONIST 1.0211916 2_CTL04E_498.LIVER-X_ANTAGONIST 1.0197096 2_CTL08A_498.LIVER-X_ANTAGONIST 1.0125297 1_CTL08A_499.GLUCOCORT_AGONIST 0.9587351 2_CTL04E_499.GLUCOCORT_AGONIST 0.9522264 3_CTL04E_499.GLUCOCORT_AGONIST 0.9727543 3_CTL08A_499.GLUCOCORT_AGONIST 0.9769958 1_CTL04E_499.GLUCOCORT_AGONIST 0.9487468 2_CTL08A_499.GLUCOCORT_AGONIST 1.0055538 2_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9049248 1_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9472611 3_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9546171 2_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9583504 1_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9270345 3_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9481311 3_CTL08A_501.THYROID_AGONIST 1.0056152 3_CTL04E_501.THYROID_AGONIST 0.9804307 1_CTL08A_501.THYROID_AGONIST 0.9975044 2_CTL04E_501.THYROID_AGONIST 0.9761977 1_CTL04E_501.THYROID_AGONIST 0.9798706 2_CTL08A_501.THYROID_AGONIST 1.0080354 3_CTL04E_502.THYROID_ANTAGONIST 0.9643569 3_CTL08A_502.THYROID_ANTAGONIST 0.9943966 1_CTL04E_502.THYROID_ANTAGONIST 0.9822730 1_CTL08A_502.THYROID_ANTAGONIST 0.9789649 2_CTL08A_502.THYROID_ANTAGONIST 1.0103410 2_CTL04E_502.THYROID_ANTAGONIST 0.9710954 2_CTL08A_505.DMSO 0.9718366 2_CTL04E_505.DMSO 0.9683724 1_CTL08A_505.DMSO 0.9647210 3_CTL04E_505.DMSO 0.9792731 1_CTL04E_505.DMSO 0.9711313 3_CTL08A_505.DMSO 0.9600512 1_CTL08A_508.RET_AGONIST 1.1578227 1_CTL04E_508.RET_AGONIST 1.1664596 1_CTL08A_509.RET_ANTAGONIST 1.0102882 3_CTL04E_509.RET_ANTAGONIST 0.9815613 1_CTL04E_509.RET_ANTAGONIST 0.9914735 2_CTL04E_509.RET_ANTAGONIST 0.9785729 3_CTL08A_509.RET_ANTAGONIST 0.9997802
%%R -o univserse
univserse <- rownames(dds)
pd.Series(list(univserse)).to_excel("./{}_{}_DEGs/UNIVERSE_DEGsPerLineAgonistVsAntagonist.xlsx".format(DS, celltype))
adataPbulk.obsm["MDS"] = np.vstack([MDS["x"],MDS["y"]]).T
sc.pl.embedding(adataPbulk, basis="MDS", color=["line","condition","nCells"], wspace=.6, size=400)
%%R
print(head(mm))
print(colnames(mm))
CTL04E.ANDROGENAGONIST CTL04E.ANDROGENANTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.ARYLHYDAGONIST CTL04E.ARYLHYDANTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.DMSO CTL04E.ESTROGENAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 1 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 1 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 1 CTL04E.ESTROGENANTAGONIST CTL04E.GLUCOCORTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.GLUCOCORTANTAGONIST CTL04E.LIVERXAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.LIVERXANTAGONIST CTL04E.RETAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.RETANTAGONIST CTL04E.THYROIDAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.THYROIDANTAGONIST CTL08A.ANDROGENAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.ANDROGENANTAGONIST CTL08A.ARYLHYDAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.ARYLHYDANTAGONIST CTL08A.DMSO 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.ESTROGENAGONIST CTL08A.ESTROGENANTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 1 0 3_CTL08A_491.ESTROGEN_AGONIST 1 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 1 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.GLUCOCORTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 2_CTL08A_491.ESTROGEN_AGONIST 0 3_CTL08A_491.ESTROGEN_AGONIST 0 2_CTL04E_491.ESTROGEN_AGONIST 0 1_CTL08A_491.ESTROGEN_AGONIST 0 1_CTL04E_491.ESTROGEN_AGONIST 0 CTL08A.GLUCOCORTANTAGONIST CTL08A.LIVERXAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.LIVERXANTAGONIST CTL08A.RETAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.RETANTAGONIST CTL08A.THYROIDAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.THYROIDANTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 2_CTL08A_491.ESTROGEN_AGONIST 0 3_CTL08A_491.ESTROGEN_AGONIST 0 2_CTL04E_491.ESTROGEN_AGONIST 0 1_CTL08A_491.ESTROGEN_AGONIST 0 1_CTL04E_491.ESTROGEN_AGONIST 0 [1] "CTL04E.ANDROGENAGONIST" "CTL04E.ANDROGENANTAGONIST" [3] "CTL04E.ARYLHYDAGONIST" "CTL04E.ARYLHYDANTAGONIST" [5] "CTL04E.DMSO" "CTL04E.ESTROGENAGONIST" [7] "CTL04E.ESTROGENANTAGONIST" "CTL04E.GLUCOCORTAGONIST" [9] "CTL04E.GLUCOCORTANTAGONIST" "CTL04E.LIVERXAGONIST" [11] "CTL04E.LIVERXANTAGONIST" "CTL04E.RETAGONIST" [13] "CTL04E.RETANTAGONIST" "CTL04E.THYROIDAGONIST" [15] "CTL04E.THYROIDANTAGONIST" "CTL08A.ANDROGENAGONIST" [17] "CTL08A.ANDROGENANTAGONIST" "CTL08A.ARYLHYDAGONIST" [19] "CTL08A.ARYLHYDANTAGONIST" "CTL08A.DMSO" [21] "CTL08A.ESTROGENAGONIST" "CTL08A.ESTROGENANTAGONIST" [23] "CTL08A.GLUCOCORTAGONIST" "CTL08A.GLUCOCORTANTAGONIST" [25] "CTL08A.LIVERXAGONIST" "CTL08A.LIVERXANTAGONIST" [27] "CTL08A.RETAGONIST" "CTL08A.RETANTAGONIST" [29] "CTL08A.THYROIDAGONIST" "CTL08A.THYROIDANTAGONIST"
%%R -o DifferentialEffectAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
DifferentialEffectAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpression <- paste("(CTL04E.", condition, "-", "CTL04E.", condAntagonist,
")-(CTL08A.", condition, "-", "CTL08A.", condAntagonist, ")",
sep = "")
print(contrastExpression)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
DifferentialEffectAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"DifferentialCTL04E-CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
}
[1] "(CTL04E.ESTROGENAGONIST-CTL04E.ESTROGENANTAGONIST)-(CTL08A.ESTROGENAGONIST-CTL08A.ESTROGENANTAGONIST)" [1] "(CTL04E.ANDROGENAGONIST-CTL04E.ANDROGENANTAGONIST)-(CTL08A.ANDROGENAGONIST-CTL08A.ANDROGENANTAGONIST)" [1] "(CTL04E.ARYLHYDAGONIST-CTL04E.ARYLHYDANTAGONIST)-(CTL08A.ARYLHYDAGONIST-CTL08A.ARYLHYDANTAGONIST)" [1] "(CTL04E.LIVERXAGONIST-CTL04E.LIVERXANTAGONIST)-(CTL08A.LIVERXAGONIST-CTL08A.LIVERXANTAGONIST)" [1] "(CTL04E.GLUCOCORTAGONIST-CTL04E.GLUCOCORTANTAGONIST)-(CTL08A.GLUCOCORTAGONIST-CTL08A.GLUCOCORTANTAGONIST)" [1] "(CTL04E.THYROIDAGONIST-CTL04E.THYROIDANTAGONIST)-(CTL08A.THYROIDAGONIST-CTL08A.THYROIDANTAGONIST)" [1] "(CTL04E.RETAGONIST-CTL04E.RETANTAGONIST)-(CTL08A.RETAGONIST-CTL08A.RETANTAGONIST)"
%%R -o PerLineAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
PerLineAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpressionCTL04E <- paste("CTL04E.", condition, "-", "CTL04E.", condAntagonist,sep = "")
print(contrastExpressionCTL04E)
contrastExpressionCTL04E <- makeContrasts(contrasts = contrastExpressionCTL04E, levels = mm)
PerLineAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"_CTL04E",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL04E), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
contrastExpressionCTL08A <- paste("CTL08A.", condition, "-", "CTL08A.", condAntagonist,sep = "")
print(contrastExpressionCTL08A)
contrastExpressionCTL08A <- makeContrasts(contrasts = contrastExpressionCTL08A, levels = mm)
PerLineAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"_CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL08A), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
}
[1] "CTL04E.ESTROGENAGONIST-CTL04E.ESTROGENANTAGONIST" [1] "CTL08A.ESTROGENAGONIST-CTL08A.ESTROGENANTAGONIST" [1] "CTL04E.ANDROGENAGONIST-CTL04E.ANDROGENANTAGONIST" [1] "CTL08A.ANDROGENAGONIST-CTL08A.ANDROGENANTAGONIST" [1] "CTL04E.ARYLHYDAGONIST-CTL04E.ARYLHYDANTAGONIST" [1] "CTL08A.ARYLHYDAGONIST-CTL08A.ARYLHYDANTAGONIST" [1] "CTL04E.LIVERXAGONIST-CTL04E.LIVERXANTAGONIST" [1] "CTL08A.LIVERXAGONIST-CTL08A.LIVERXANTAGONIST" [1] "CTL04E.GLUCOCORTAGONIST-CTL04E.GLUCOCORTANTAGONIST" [1] "CTL08A.GLUCOCORTAGONIST-CTL08A.GLUCOCORTANTAGONIST" [1] "CTL04E.THYROIDAGONIST-CTL04E.THYROIDANTAGONIST" [1] "CTL08A.THYROIDAGONIST-CTL08A.THYROIDANTAGONIST" [1] "CTL04E.RETAGONIST-CTL04E.RETANTAGONIST" [1] "CTL08A.RETAGONIST-CTL08A.RETANTAGONIST"
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Prepare compounds list
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineAgonistVsAntagonist.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 4
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Agonist vs Antagonist logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineAgonistVsAntagonist[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineAgonistVsAntagonist[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
%%R -o PerLineConditionVsDMSO
AllGenes <- dim(Counts)[1]
PerLineConditionVsDMSO <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
contrastExpressionCTL04E <- paste("CTL04E.", condition, "-", "CTL04E.DMSO",sep = "")
print(contrastExpressionCTL04E)
contrastExpressionCTL04E <- makeContrasts(contrasts = contrastExpressionCTL04E, levels = mm)
PerLineConditionVsDMSO[[paste(condition, ".VS.DMSO_CTL04E",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL04E), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
contrastExpressionCTL08A <- paste("CTL08A.", condition, "-", "CTL08A.DMSO",sep = "")
print(contrastExpressionCTL08A)
contrastExpressionCTL08A <- makeContrasts(contrasts = contrastExpressionCTL08A, levels = mm)
PerLineConditionVsDMSO[[paste(condition, ".VS.DMSO_CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL08A), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
[1] "CTL04E.ESTROGENAGONIST-CTL04E.DMSO" [1] "CTL08A.ESTROGENAGONIST-CTL08A.DMSO" [1] "CTL04E.ESTROGENANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ESTROGENANTAGONIST-CTL08A.DMSO" [1] "CTL04E.ANDROGENAGONIST-CTL04E.DMSO" [1] "CTL08A.ANDROGENAGONIST-CTL08A.DMSO" [1] "CTL04E.ANDROGENANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ANDROGENANTAGONIST-CTL08A.DMSO" [1] "CTL04E.ARYLHYDAGONIST-CTL04E.DMSO" [1] "CTL08A.ARYLHYDAGONIST-CTL08A.DMSO" [1] "CTL04E.ARYLHYDANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ARYLHYDANTAGONIST-CTL08A.DMSO" [1] "CTL04E.LIVERXAGONIST-CTL04E.DMSO" [1] "CTL08A.LIVERXAGONIST-CTL08A.DMSO" [1] "CTL04E.LIVERXANTAGONIST-CTL04E.DMSO" [1] "CTL08A.LIVERXANTAGONIST-CTL08A.DMSO" [1] "CTL04E.GLUCOCORTAGONIST-CTL04E.DMSO" [1] "CTL08A.GLUCOCORTAGONIST-CTL08A.DMSO" [1] "CTL04E.GLUCOCORTANTAGONIST-CTL04E.DMSO" [1] "CTL08A.GLUCOCORTANTAGONIST-CTL08A.DMSO" [1] "CTL04E.THYROIDAGONIST-CTL04E.DMSO" [1] "CTL08A.THYROIDAGONIST-CTL08A.DMSO" [1] "CTL04E.THYROIDANTAGONIST-CTL04E.DMSO" [1] "CTL08A.THYROIDANTAGONIST-CTL08A.DMSO" [1] "CTL04E.RETAGONIST-CTL04E.DMSO" [1] "CTL08A.RETAGONIST-CTL08A.DMSO" [1] "CTL04E.RETANTAGONIST-CTL04E.DMSO" [1] "CTL08A.RETANTAGONIST-CTL08A.DMSO"
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineConditionVsDMSO.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 6
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Compound vs DMSO logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineConditionVsDMSO[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineConditionVsDMSO[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Prepare compounds list
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineAgonistVsAntagonist.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 4
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Agonist vs Antagonist logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineAgonistVsAntagonist[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineAgonistVsAntagonist[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
%%R -o DifferentialEffectCompoundVsDMSO
AllGenes <- dim(Counts)[1]
DifferentialEffectCompoundVsDMSO <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpression <- paste("(CTL04E.", condition, "-", "CTL04E.DMSO)-(CTL08A.", condition, "-", "CTL08A.DMSO)",
sep = "")
print(contrastExpression)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
DifferentialEffectCompoundVsDMSO[[paste(condition, ".VS.DMSO.DifferentialCTL04E-CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
[1] "(CTL04E.ESTROGENAGONIST-CTL04E.DMSO)-(CTL08A.ESTROGENAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ESTROGENANTAGONIST-CTL04E.DMSO)-(CTL08A.ESTROGENANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ANDROGENAGONIST-CTL04E.DMSO)-(CTL08A.ANDROGENAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ANDROGENANTAGONIST-CTL04E.DMSO)-(CTL08A.ANDROGENANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ARYLHYDAGONIST-CTL04E.DMSO)-(CTL08A.ARYLHYDAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ARYLHYDANTAGONIST-CTL04E.DMSO)-(CTL08A.ARYLHYDANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.LIVERXAGONIST-CTL04E.DMSO)-(CTL08A.LIVERXAGONIST-CTL08A.DMSO)" [1] "(CTL04E.LIVERXANTAGONIST-CTL04E.DMSO)-(CTL08A.LIVERXANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.GLUCOCORTAGONIST-CTL04E.DMSO)-(CTL08A.GLUCOCORTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.GLUCOCORTANTAGONIST-CTL04E.DMSO)-(CTL08A.GLUCOCORTANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.THYROIDAGONIST-CTL04E.DMSO)-(CTL08A.THYROIDAGONIST-CTL08A.DMSO)" [1] "(CTL04E.THYROIDANTAGONIST-CTL04E.DMSO)-(CTL08A.THYROIDANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.RETAGONIST-CTL04E.DMSO)-(CTL08A.RETAGONIST-CTL08A.DMSO)" [1] "(CTL04E.RETANTAGONIST-CTL04E.DMSO)-(CTL08A.RETANTAGONIST-CTL08A.DMSO)"
FDR = 0.05
dir_path = "./{}_{}_DEGs".format(DS, celltype)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
SharedConditionVsDmso_filtered = {}
for k in list(SharedConditionVsDmso.keys()):
DEGs = SharedConditionVsDmso[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/SharedCompoundVsDmso.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
SharedConditionVsDmso_filtered[k] = DEGs[DEGs["FDR"] < FDR]
SharedAgonistVsAntagonist_filtered = {}
for k in list(SharedAgonistVsAntagonist.keys()):
DEGs = SharedAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/SharedAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
SharedAgonistVsAntagonist_filtered[k] = DEGs[DEGs["FDR"] < FDR]
DifferentialEffectCompoundVsDMSO_filtered = {}
for k in list(DifferentialEffectCompoundVsDMSO.keys()):
DEGs = DifferentialEffectCompoundVsDMSO[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DifferentialEffectCompoundVsDMSO.{}_{}_{}.FDR{}.xlsx".format(DS, celltype, k, celltype, DS,FDR))
DifferentialEffectCompoundVsDMSO_filtered[k] = DEGs[DEGs["FDR"] < FDR]
DifferentialEffectAgonistVsAntagonist_filtered = {}
for k in list(DifferentialEffectAgonistVsAntagonist.keys()):
DEGs = DifferentialEffectAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DifferentialEffectAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype, k, celltype, DS,FDR))
DifferentialEffectAgonistVsAntagonist_filtered[k] = DEGs[DEGs["FDR"] < FDR]
PerLineAgonistVsAntagonist_filtered = {}
for k in list(PerLineAgonistVsAntagonist.keys()):
DEGs = PerLineAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DEGsPerLineAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
PerLineAgonistVsAntagonist_filtered["UP_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] > 2)]
PerLineAgonistVsAntagonist_filtered["DOWN_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] < -2)]
PerLineConditionVsDMSO_filtered = {}
for k in list(PerLineConditionVsDMSO.keys()):
DEGs = PerLineConditionVsDMSO[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DEGsPerLineCompoundVsDMSO.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
PerLineConditionVsDMSO_filtered["UP_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] > 2)]
PerLineConditionVsDMSO_filtered["DOWN_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] < -2)]
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
JaccardDF = pd.DataFrame(index = list(PerLineAgonistVsAntagonist_filtered.keys()), columns = list(PerLineAgonistVsAntagonist_filtered.keys()))
for combination in list(itertools.combinations(list(PerLineAgonistVsAntagonist_filtered.keys()),2)):
set0 = set(PerLineAgonistVsAntagonist_filtered[combination[0]].genes.tolist())
set1 = set(PerLineAgonistVsAntagonist_filtered[combination[1]].genes.tolist())
Int = set0.intersection(set1)
Union = set0.union(set1)
Jaccard = len(Int) / len(Union)
JaccardDF.loc[combination[0],combination[1]] = Jaccard
JaccardDF.loc[combination[1],combination[0]] = Jaccard
JaccardDF = JaccardDF.fillna(1)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Assuming JaccardDF is your DataFrame of Jaccard similarities
# Convert similarity to distance: distance = 1 - similarity
distance_matrix = 1 - JaccardDF.astype(float)
# Optionally, sort the DataFrame for a canonical order
distance_matrix = distance_matrix.sort_index().sort_index(axis=1)
# Create a clustermap using the 'bwr' colormap with improved aesthetics.
g = sns.clustermap(
distance_matrix,
metric="euclidean", # or 'cityblock', 'cosine', etc.
method="average", # clustering method: single, complete, ward, etc.
cmap="bwr_r", # blue-white-red palette
figsize=(15, 15),
cbar_pos=(0.02, 0.8, 0.05, 0.18) # adjust colorbar position if needed
)
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
# The clustermap reorders rows/columns; get the reordered DataFrame.
reordered_data = distance_matrix.iloc[
g.dendrogram_row.reordered_ind, g.dendrogram_col.reordered_ind
]
# Annotate each cell with the corresponding Jaccard similarity value.
# Note: similarity = 1 - distance
for i, row in enumerate(reordered_data.index):
for j, col in enumerate(reordered_data.columns):
d = reordered_data.loc[row, col]
jaccard_sim = 1 - d # Convert back to similarity
g.ax_heatmap.text(
j + 0.5, i + 0.5,
f"{jaccard_sim:.2f}",
ha='center', va='center',
color='black', fontsize=8
)
# Remove grid lines from the heatmap
g.ax_heatmap.grid(False)
# Improve overall aesthetics
g.fig.suptitle("DEGs Jaccard similarity Agonist vs Antagonist", fontsize=16, y=1.05)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=8)
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8)
g.ax_heatmap.tick_params(axis='both', which='both', length=0)
g.cax.set_position([.92, .2, .03, .45]) # adjust the colorbar position if needed
plt.show()
### CONDITION VS DMSO
### CONDITION VS DMSO
### CONDITION VS DMSO
### CONDITION VS DMSO
JaccardDF = pd.DataFrame(index = list(PerLineConditionVsDMSO_filtered.keys()), columns = list(PerLineConditionVsDMSO_filtered.keys()))
for combination in list(itertools.combinations(list(PerLineConditionVsDMSO_filtered.keys()),2)):
set0 = set(PerLineConditionVsDMSO_filtered[combination[0]].genes.tolist())
set1 = set(PerLineConditionVsDMSO_filtered[combination[1]].genes.tolist())
Int = set0.intersection(set1)
Union = set0.union(set1)
Jaccard = len(Int) / len(Union)
JaccardDF.loc[combination[0],combination[1]] = Jaccard
JaccardDF.loc[combination[1],combination[0]] = Jaccard
JaccardDF = JaccardDF.fillna(1)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Assuming JaccardDF is your DataFrame of Jaccard similarities
# Convert similarity to distance: distance = 1 - similarity
distance_matrix = 1 - JaccardDF.astype(float)
# Optionally, sort the DataFrame for a canonical order
distance_matrix = distance_matrix.sort_index().sort_index(axis=1)
# Create a clustermap using the 'bwr' colormap with improved aesthetics.
g = sns.clustermap(
distance_matrix,
metric="euclidean", # or 'cityblock', 'cosine', etc.
method="average", # clustering method: single, complete, ward, etc.
cmap="bwr_r", # blue-white-red palette
figsize=(20, 20),
cbar_pos=(0.02, 0.8, 0.05, 0.18) # adjust colorbar position if needed
)
# Hide the dendrograms (do not display them)
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
# The clustermap reorders rows/columns; get the reordered DataFrame.
reordered_data = distance_matrix.iloc[
g.dendrogram_row.reordered_ind, g.dendrogram_col.reordered_ind
]
# Annotate each cell with the corresponding Jaccard similarity value.
# Note: similarity = 1 - distance
for i, row in enumerate(reordered_data.index):
for j, col in enumerate(reordered_data.columns):
d = reordered_data.loc[row, col]
jaccard_sim = 1 - d # Convert back to similarity
g.ax_heatmap.text(
j + 0.5, i + 0.5,
f"{jaccard_sim:.2f}",
ha='center', va='center',
color='black', fontsize=5
)
# Remove grid lines from the heatmap
g.ax_heatmap.grid(False)
# Improve overall aesthetics
g.fig.suptitle("DEGs Jaccard similarity Compound vs DMSO", fontsize=16, y=1.05)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=8)
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8)
g.ax_heatmap.tick_params(axis='both', which='both', length=0)
g.cax.set_position([.92, .2, .03, .45]) # adjust the colorbar position if needed
plt.show()
CTL04Eresults = pd.concat([PerLineAgonistVsAntagonist_filtered[k].assign(Line="CTL04E").assign(Group=k) for k in list(PerLineAgonistVsAntagonist_filtered.keys()) if "_CTL04E" in k], ignore_index=True)
CTL04Eresults["Direction"] = CTL04Eresults.Group.str.split("_", expand=True)[0]
CTL04Eresults["Contrast"] = CTL04Eresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL04Eresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL04Eresults = CTL04Eresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
CTL08Aresults = pd.concat([PerLineAgonistVsAntagonist_filtered[k].assign(Line="CTL08A").assign(Group=k) for k in list(PerLineAgonistVsAntagonist_filtered.keys()) if "_CTL08A" in k], ignore_index=True)
CTL08Aresults["Direction"] = CTL08Aresults.Group.str.split("_", expand=True)[0]
CTL08Aresults["Contrast"] = CTL08Aresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL08Aresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL08Aresults = CTL08Aresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
import importlib
import matplotlib.pyplot as plt
import seaborn as sns
# Reload matplotlib.pyplot to restore its original state
# Prepare your summary DataFrame indexed by 'Contrast'
summary_results = {}
summary_results["CTL04E"] = CTL04Eresults.set_index('Contrast')[['Up', 'Down', 'Total']]
summary_results["CTL08A"] = CTL08Aresults.set_index('Contrast')[['Up', 'Down', 'Total']]
# Compute global minimum and maximum values across both DataFrames for uniform scaling
global_min = min(df.min().min() for df in summary_results.values())
global_max = max(df.max().max() for df in summary_results.values())
# Create a figure with 1 row and 2 columns of subplots
fig, axes = plt.subplots(1, 2, figsize=(20, 10))
# Loop through the dictionary keys and plot each heatmap on its corresponding axis
for n, k in enumerate(["CTL08A", "CTL04E"]):
ax = axes[n] # get the current axis
show_cbar = (n == 1)
sns.heatmap(
summary_results[k],
annot=True,
annot_kws={"size": 20}, # Increase text size here
fmt="d",
cmap='YlOrBr',
linewidths=0,
linecolor='none',
vmin=global_min, # set uniform minimum
vmax=global_max, # set uniform maximum
ax=ax,
cbar=show_cbar,
cbar_kws={'label': 'Count'} if show_cbar else None,
)
# Remove any visible grid lines by setting the QuadMesh edge color to the face color
ax.collections[0].set_edgecolor("face")
ax.grid(False)
# Set the axis labels and title using the axis methods
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_title(k, fontdict={'size': 25})
# Adjust the layout and display the figure
plt.tight_layout()
plt.show()
CTL04Eresults = pd.concat([PerLineConditionVsDMSO_filtered[k].assign(Line="CTL04E").assign(Group=k) for k in list(PerLineConditionVsDMSO_filtered.keys()) if "_CTL04E" in k], ignore_index=True)
CTL04Eresults["Direction"] = CTL04Eresults.Group.str.split("_", expand=True)[0]
CTL04Eresults["Contrast"] = CTL04Eresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL04Eresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL04Eresults = CTL04Eresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
CTL08Aresults = pd.concat([PerLineConditionVsDMSO_filtered[k].assign(Line="CTL08A").assign(Group=k) for k in list(PerLineConditionVsDMSO_filtered.keys()) if "_CTL08A" in k], ignore_index=True)
CTL08Aresults["Direction"] = CTL08Aresults.Group.str.split("_", expand=True)[0]
CTL08Aresults["Contrast"] = CTL08Aresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL08Aresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL08Aresults = CTL08Aresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
# Reload matplotlib.pyplot to restore its original state
importlib.reload(plt)
# Prepare your summary DataFrame indexed by 'Contrast'
summary_results = {}
summary_results["CTL04E"] = CTL04Eresults.set_index('Contrast')[['Up', 'Down', 'Total']]
summary_results["CTL08A"] = CTL08Aresults.set_index('Contrast')[['Up', 'Down', 'Total']]
# Compute global minimum and maximum values across both DataFrames for uniform scaling
global_min = min(df.min().min() for df in summary_results.values())
global_max = max(df.max().max() for df in summary_results.values())
# Create a figure with 1 row and 2 columns of subplots
fig, axes = plt.subplots(1, 2, figsize=(20, 20))
# Loop through the dictionary keys and plot each heatmap on its corresponding axis
for n, k in enumerate(["CTL08A", "CTL04E"]):
ax = axes[n] # get the current axis
show_cbar = (n == 1)
sns.heatmap(
summary_results[k],
annot=True,
annot_kws={"size": 20}, # Increase text size here
fmt="d",
cmap='YlOrBr',
linewidths=0,
linecolor='none',
vmin=global_min, # set uniform minimum
vmax=global_max, # set uniform maximum
ax=ax,
cbar=show_cbar,
cbar_kws={'label': 'Count'} if show_cbar else None,
)
# Remove any visible grid lines by setting the QuadMesh edge color to the face color
ax.collections[0].set_edgecolor("face")
ax.grid(False)
# Set the axis labels and title using the axis methods
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_title(k, fontdict={'size': 25})
# Adjust the layout and display the figure
plt.tight_layout()
plt.show()
ScatterploDict = {}
for k in list(PerLineConditionVsDMSO.keys()):
Compound = k.split(".")[0]
if Compound not in list(ScatterploDict.keys()):
ScatterploDict[Compound] = pd.DataFrame()
Line = k.split("_")[-1]
DF = PerLineConditionVsDMSO[k].assign(line=Line)
DifferentiallyResponding = DifferentialEffectCompoundVsDMSO_filtered[f"{Compound}.VS.DMSO.DifferentialCTL04E-CTL08A"]
DifferentiallyResponding = DifferentiallyResponding[abs(DifferentiallyResponding["logFC"]) >= 1.5]
DifferentiallyResponding = DifferentiallyResponding[DifferentiallyResponding["FDR"] <= 0.05].genes.tolist()
DF = DF[DF.genes.isin(DifferentiallyResponding)]
ScatterploDict[Compound] = pd.concat([ScatterploDict[Compound], DF], ignore_index=True)
ScatterploDict[Compound]
for Compound in list(ScatterploDict.keys()):
# Assuming ScatterploDict[Compound] is your DataFrame
df = ScatterploDict[Compound]
# Pivot the DataFrame so that each gene is a row and you have one column per line
df_pivot = df.pivot(index='genes', columns='line', values='logFC')
# Optionally, drop genes that don't have logFC values for both lines
df_pivot = df_pivot.fillna(0)
if df_pivot.shape[0] == 0:
print(f"No genes differentially regulated found for {Compound} according to selected filters")
continue
df_pivot["Delta"] = abs(df_pivot["CTL04E"] - df_pivot["CTL08A"])
plt.figure(figsize=(10, 10))
# Optionally scale the marker sizes (e.g., multiply Delta by a constant factor)
marker_sizes = df_pivot["Delta"] * 100 # Adjust scaling factor as needed
# Plot the scatter points using the correct parameter "s" for sizes
plt.scatter(df_pivot['CTL08A'], df_pivot['CTL04E'], alpha=0.2, color='blue', s=marker_sizes)
# Annotate each point with its gene name
for gene in df_pivot.index:
x = df_pivot.loc[gene, 'CTL08A']
y = df_pivot.loc[gene, 'CTL04E']
plt.text(x, y, gene, fontsize=8, alpha=0.8)
# Draw reference lines at 0 (for 0-centered values)
plt.axvline(0, color='gray', linestyle='--')
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel('CTL08A logFC ', fontsize=25)
plt.ylabel('CTL04E logFC ', fontsize=25)
plt.title(Compound, fontsize=25)
plt.tight_layout()
plt.show()