import scanpy as sc
import pandas as pd
from matplotlib import pylab
import random
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import itertools
import plotly.express as px
import yaml
import numpy as np
import random
import anndata as ad
from scipy.sparse import csr_matrix, issparse
from scipy import sparse
from matplotlib.colors import TwoSlopeNorm
import scanpy.external as sce
import sys
import scvelo as scv
import anndata
import rapids_singlecell as rsc
import anndata
from pathlib import Path
try:
nb_fname = ipynbname.name()
except:
nb_fname = os.path.basename(globals()["__vsc_ipynb_file__"])
print(nb_fname)
10.2_Hormones_substudy_LineMagnification_Progenitors.ipynb
sc.settings.set_figure_params(dpi=100, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (9, 9)
homeDir = os.getenv("HOME")
sys.path.insert(1, homeDir+"/utils/")
from PlotPCA_components import *
from AtlasClasses import *
DS="hormones_substudy"
celltype="Progenitors"
ReferenceTissue="cortex"
with open(homeDir+"/utils/ReferenceDict.yaml", 'r') as file:
ReferencePaths = yaml.safe_load(file)
for k in list(ReferencePaths.keys()):
ReferencePaths[k]["adataPath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["adataPath"]
#ReferencePaths[k]["signaturePath"] = homeDir+ReferencePaths[k]["signaturePath"]
ReferencePaths[k]["signaturePath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["signaturePath"]
ReferencePaths[k]["signaturePurityPath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["signaturePurityPath"]
sc.settings.set_figure_params(dpi=80, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (6, 6)
dir_path = homeDir+"/adatas/{}_ByMacroCelltype".format(DS)
pcs=15
n_neighbors=30
seed = 437
adata = sc.read_h5ad(dir_path+"/{}.{}.h5ad".format(DS,celltype))
adata
AnnData object with n_obs × n_vars = 43164 × 36601
obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap'
uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
layers: 'counts'
obsp: 'connectivities', 'distances'
# Clean ribo, mito anmd sec genes
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
adata = adata[:,~adata.var_names.str.startswith("MT-")].copy()
# ribosomal genes
adata = adata[:,~adata.var_names.str.startswith(("RPS", "RPL"))].copy()
# Sex genes
print(adata)
adata = adata[:,~adata.var_names.isin(pd.read_csv(homeDir+"/resources/X.genes.tsv", header=None)[0].tolist())].copy()
adata = adata[:,~adata.var_names.isin(pd.read_csv(homeDir+"/resources/Y.genes.tsv", header=None)[0].tolist())].copy()
print(adata)
# Re-normalize
adata.X = adata.layers["counts"].copy()
sc.pp.normalize_total(adata, 20000)
sc.pp.log1p(adata)
adata.layers["logNorm"] = adata.X.copy()
AnnData object with n_obs × n_vars = 43164 × 36485
obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap'
uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
layers: 'counts'
obsp: 'connectivities', 'distances'
AnnData object with n_obs × n_vars = 43164 × 35477
obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap'
uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
layers: 'counts'
obsp: 'connectivities', 'distances'
WARNING: adata.X seems to be already log-transformed.
sc.pl.umap(adata, color=["line","condition"])
adata.obs["condition"].value_counts()
condition ARYL_HYD_ANTAGONIST 4979 ANDROGEN_ANTAGONIST 4155 LIVER-X_ANTAGONIST 4091 ESTROGEN_AGONIST 3920 RET_AGONIST 3884 ESTROGEN_ANTAGONIST 3514 THYROID_ANTAGONIST 2952 ANDROGEN_AGONIST 2464 DMSO 2333 ARYL_HYD_AGONIST 2201 RET_ANTAGONIST 1841 GLUCOCORT_AGONIST 1738 GLUCOCORT_ANTAGONIST 1729 LIVER-X_AGONIST 1704 THYROID_AGONIST 1659 Name: count, dtype: int64
HVGSCondict = {}
for line in adata.obs["line"].unique().tolist():
adataGenotype = adata[adata.obs["line"] == line]
for condition in adataGenotype.obs["condition"].unique().tolist():
adataCOnd = adataGenotype[adataGenotype.obs["condition"] == condition].copy()
if condition not in list(HVGSCondict.keys()):
HVGSCondict[condition] = []
#Print Nsamples
nsamples = len(adataCOnd.obs["sample_id"].unique().tolist())
print(f"Found {nsamples} repliacates for {line} {condition}")
if nsamples < 3:
minOverlap = nsamples
else:
minOverlap = nsamples-1
print(f"Only HVGs common to {minOverlap} replicates will be kept")
sc.pp.highly_variable_genes(adataCOnd, batch_key="sample_id", flavor="seurat", n_top_genes=1000)
CommonHVGs = set(adataCOnd.var_names[adataCOnd.var["highly_variable_nbatches"] >= minOverlap])
nHVGs = len(CommonHVGs)
print(f"Found {nHVGs} common HVGs across {minOverlap} replicates for line {line} exposure {condition} ")
print("\n")
HVGSCondict[condition].append(CommonHVGs)
Found 3 repliacates for CTL04E ESTROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 558 common HVGs across 2 replicates for line CTL04E exposure ESTROGEN_AGONIST Found 3 repliacates for CTL04E ESTROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 559 common HVGs across 2 replicates for line CTL04E exposure ESTROGEN_ANTAGONIST Found 3 repliacates for CTL04E ANDROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 506 common HVGs across 2 replicates for line CTL04E exposure ANDROGEN_AGONIST Found 3 repliacates for CTL04E ANDROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 576 common HVGs across 2 replicates for line CTL04E exposure ANDROGEN_ANTAGONIST Found 3 repliacates for CTL04E ARYL_HYD_AGONIST Only HVGs common to 2 replicates will be kept Found 555 common HVGs across 2 replicates for line CTL04E exposure ARYL_HYD_AGONIST Found 3 repliacates for CTL04E ARYL_HYD_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 648 common HVGs across 2 replicates for line CTL04E exposure ARYL_HYD_ANTAGONIST Found 3 repliacates for CTL04E LIVER-X_AGONIST Only HVGs common to 2 replicates will be kept Found 517 common HVGs across 2 replicates for line CTL04E exposure LIVER-X_AGONIST Found 3 repliacates for CTL04E LIVER-X_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 691 common HVGs across 2 replicates for line CTL04E exposure LIVER-X_ANTAGONIST Found 3 repliacates for CTL04E GLUCOCORT_AGONIST Only HVGs common to 2 replicates will be kept Found 524 common HVGs across 2 replicates for line CTL04E exposure GLUCOCORT_AGONIST Found 3 repliacates for CTL04E GLUCOCORT_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 515 common HVGs across 2 replicates for line CTL04E exposure GLUCOCORT_ANTAGONIST Found 3 repliacates for CTL04E THYROID_AGONIST Only HVGs common to 2 replicates will be kept Found 516 common HVGs across 2 replicates for line CTL04E exposure THYROID_AGONIST Found 3 repliacates for CTL04E THYROID_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 511 common HVGs across 2 replicates for line CTL04E exposure THYROID_ANTAGONIST Found 3 repliacates for CTL04E DMSO Only HVGs common to 2 replicates will be kept Found 524 common HVGs across 2 replicates for line CTL04E exposure DMSO Found 1 repliacates for CTL04E RET_AGONIST Only HVGs common to 1 replicates will be kept Found 1001 common HVGs across 1 replicates for line CTL04E exposure RET_AGONIST Found 3 repliacates for CTL04E RET_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 529 common HVGs across 2 replicates for line CTL04E exposure RET_ANTAGONIST Found 3 repliacates for CTL08A ESTROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 493 common HVGs across 2 replicates for line CTL08A exposure ESTROGEN_AGONIST Found 3 repliacates for CTL08A ESTROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 573 common HVGs across 2 replicates for line CTL08A exposure ESTROGEN_ANTAGONIST Found 3 repliacates for CTL08A ANDROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 460 common HVGs across 2 replicates for line CTL08A exposure ANDROGEN_AGONIST Found 3 repliacates for CTL08A ANDROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 537 common HVGs across 2 replicates for line CTL08A exposure ANDROGEN_ANTAGONIST Found 3 repliacates for CTL08A ARYL_HYD_AGONIST Only HVGs common to 2 replicates will be kept Found 520 common HVGs across 2 replicates for line CTL08A exposure ARYL_HYD_AGONIST Found 3 repliacates for CTL08A ARYL_HYD_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 642 common HVGs across 2 replicates for line CTL08A exposure ARYL_HYD_ANTAGONIST Found 3 repliacates for CTL08A LIVER-X_AGONIST Only HVGs common to 2 replicates will be kept Found 580 common HVGs across 2 replicates for line CTL08A exposure LIVER-X_AGONIST Found 3 repliacates for CTL08A LIVER-X_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 692 common HVGs across 2 replicates for line CTL08A exposure LIVER-X_ANTAGONIST Found 3 repliacates for CTL08A GLUCOCORT_AGONIST Only HVGs common to 2 replicates will be kept Found 498 common HVGs across 2 replicates for line CTL08A exposure GLUCOCORT_AGONIST Found 3 repliacates for CTL08A GLUCOCORT_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 507 common HVGs across 2 replicates for line CTL08A exposure GLUCOCORT_ANTAGONIST Found 3 repliacates for CTL08A THYROID_AGONIST Only HVGs common to 2 replicates will be kept Found 523 common HVGs across 2 replicates for line CTL08A exposure THYROID_AGONIST Found 3 repliacates for CTL08A THYROID_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 497 common HVGs across 2 replicates for line CTL08A exposure THYROID_ANTAGONIST Found 3 repliacates for CTL08A DMSO Only HVGs common to 2 replicates will be kept Found 556 common HVGs across 2 replicates for line CTL08A exposure DMSO Found 1 repliacates for CTL08A RET_AGONIST Only HVGs common to 1 replicates will be kept Found 1000 common HVGs across 1 replicates for line CTL08A exposure RET_AGONIST Found 2 repliacates for CTL08A RET_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 388 common HVGs across 2 replicates for line CTL08A exposure RET_ANTAGONIST
sc.pp.highly_variable_genes(adata, batch_key="line", flavor="seurat", n_top_genes=1000)
HVGSLine = set(adata.var_names[adata.var["highly_variable_nbatches"] >=1].tolist())
nHVGSLine = len(HVGSLine)
print(f"Found {nHVGSLine} common pan-condition HVGs across both lines ")
Found 1521 common pan-condition HVGs across both lines
##### First HVGs set
##### First HVGs set
IntersectionDEGs = []
for k in list(HVGSCondict.keys()):
IntersectionDEGsCond = list(set.union(*HVGSCondict[k]))
nIntersectionDEGsCond = len(IntersectionDEGsCond)
#print(f"Found {nIntersectionDEGsCond} HVGs from the intersection of both genotypes replicates for exposure {condition} ")
IntersectionDEGs.extend(IntersectionDEGsCond)
IntersectionDEGs = set(IntersectionDEGs)
nIntersectionDEGsCond = len(IntersectionDEGs)
print(f"Found {nIntersectionDEGsCond} HVGs from the union of both genotypes replicates for exposure {condition} ")
##### Second HVGs set
##### Second HVGs set
FinalHVGs = list(IntersectionDEGs.union(HVGSLine))
nFinalHVGs = len(FinalHVGs)
print(f"Found {nFinalHVGs} HVGs from the union of both genotypes replicates and across exposures ")
Found 3957 HVGs from the union of both genotypes replicates for exposure RET_ANTAGONIST Found 4430 HVGs from the union of both genotypes replicates and across exposures
adata
AnnData object with n_obs × n_vars = 43164 × 35477
obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap'
uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
layers: 'counts', 'logNorm'
obsp: 'connectivities', 'distances'
adata.var["highly_variable_seurat_overlap"] = adata.var_names.isin(FinalHVGs)
print(adata.var["highly_variable_seurat_overlap"].sum())
adata.var["highly_variable"] = adata.var["highly_variable_seurat_overlap"]
sc.pp.scale(adata)
adata.layers["scaled"] = adata.X.copy()
sc.pp.pca(adata, use_highly_variable=True)
sc.pp.neighbors(adata, n_pcs=pcs, n_neighbors=n_neighbors, random_state=seed)
adata.X = adata.layers["logNorm"].copy()
4430
sc.tl.umap(adata, random_state=seed)
sc.tl.draw_graph(adata)
sc.pl.umap(adata, color=["line","condition","Consensus_call"], size=10, vmin='p1', vmax='p99', wspace=.2, ncols=2)
sc.pl.draw_graph(adata, color=["line","condition","Consensus_call"], size=10, vmin='p1', vmax='p99', wspace=.2, ncols=2)
print(adata.X.max())
print(adata.X.min())
9.056654 0.0
del adata.layers["scaled"]
del adata.layers["logNorm"]
# Store results
adata.write_h5ad(dir_path+"/{}.{}.LineMagnification.h5ad".format(DS,celltype))
adata = sc.read_h5ad(dir_path+"/{}.{}.LineMagnification.h5ad".format(DS,celltype))
adataPbulk = adata.copy()
adataPbulk.obs["line_condition"] = adataPbulk.obs["line"].astype(str)+"_"+ adataPbulk.obs["condition"].astype(str)
import hashlib Xtab = pd.crosstab(adataPbulk.obs["condition"],adataPbulk.obs["line"]) print(Xtab) minCells = Xtab.min().min() Compositionhash = hashlib.sha256(Xtab.sort_index(axis=1).to_csv(index=True).encode('utf-8')).hexdigest()
SelectedCells = [] SelectedCellsFile = "./{}.{}.selectedBCs.txt".format(nb_fname,Compositionhash) if os.path.isfile(SelectedCellsFile): print("Loading existing cells subset") SelectedCells = pd.read_csv(SelectedCellsFile)["0"].tolist() else: print("File not found or compositions are changed, performing downsampling to balance conditions/lines") for line in adataPbulk.obs["line_condition"].unique(): pbulkCond = adataPbulk[(adataPbulk.obs["line_condition"] == line)]
pbulkCondCells = random.sample(pbulkCond.obs_names.tolist(), minCells)
SelectedCells.extend(pbulkCondCells)
pd.Series(SelectedCells).to_csv(SelectedCellsFile)
adataPbulk = adataPbulk[SelectedCells] pd.crosstab(adataPbulk.obs["condition"],adataPbulk.obs["line"])
adataPbulk = AtlasTile(adataPbulk, regionObs="harmonizedRegion", tissueObs="tissue", ageObs="PCW", study=DS,scalevar=True, groupCov="groupCov")
adataPbulk.AggregateCounts(method="pseudoBulk", groupcov="sample_id", layer="counts", cellStateObs="condition", prenormalize=False, postnormalize=False)
adataPbulk = adataPbulk.uns["pseudoBulkadata.layer_counts.by_sample_id_condition"].copy()
Pseudobulking counts for sample 3_CTL04E_491 by condition obs Pseudobulking counts for sample 2_CTL08A_491 by condition obs Pseudobulking counts for sample 3_CTL08A_491 by condition obs Pseudobulking counts for sample 2_CTL04E_491 by condition obs Pseudobulking counts for sample 1_CTL08A_491 by condition obs Pseudobulking counts for sample 1_CTL04E_491 by condition obs Pseudobulking counts for sample 3_CTL04E_492 by condition obs Pseudobulking counts for sample 1_CTL08A_492 by condition obs Pseudobulking counts for sample 1_CTL04E_492 by condition obs Pseudobulking counts for sample 2_CTL04E_492 by condition obs Pseudobulking counts for sample 2_CTL08A_492 by condition obs Pseudobulking counts for sample 3_CTL08A_492 by condition obs Pseudobulking counts for sample 2_CTL08A_493 by condition obs Pseudobulking counts for sample 3_CTL04E_493 by condition obs Pseudobulking counts for sample 3_CTL08A_493 by condition obs Pseudobulking counts for sample 2_CTL04E_493 by condition obs Pseudobulking counts for sample 1_CTL04E_493 by condition obs Pseudobulking counts for sample 1_CTL08A_493 by condition obs Pseudobulking counts for sample 2_CTL08A_494 by condition obs Pseudobulking counts for sample 2_CTL04E_494 by condition obs Pseudobulking counts for sample 1_CTL04E_494 by condition obs Pseudobulking counts for sample 3_CTL04E_494 by condition obs Pseudobulking counts for sample 3_CTL08A_494 by condition obs Pseudobulking counts for sample 1_CTL08A_494 by condition obs Pseudobulking counts for sample 1_CTL04E_495 by condition obs Pseudobulking counts for sample 2_CTL04E_495 by condition obs Pseudobulking counts for sample 3_CTL08A_495 by condition obs Pseudobulking counts for sample 3_CTL04E_495 by condition obs Pseudobulking counts for sample 1_CTL08A_495 by condition obs Pseudobulking counts for sample 2_CTL08A_495 by condition obs Pseudobulking counts for sample 2_CTL04E_496 by condition obs Pseudobulking counts for sample 2_CTL08A_496 by condition obs Pseudobulking counts for sample 1_CTL08A_496 by condition obs Pseudobulking counts for sample 3_CTL04E_496 by condition obs Pseudobulking counts for sample 1_CTL04E_496 by condition obs Pseudobulking counts for sample 3_CTL08A_496 by condition obs Pseudobulking counts for sample 3_CTL04E_497 by condition obs Pseudobulking counts for sample 2_CTL04E_497 by condition obs Pseudobulking counts for sample 3_CTL08A_497 by condition obs Pseudobulking counts for sample 2_CTL08A_497 by condition obs Pseudobulking counts for sample 1_CTL04E_497 by condition obs Pseudobulking counts for sample 1_CTL08A_497 by condition obs Pseudobulking counts for sample 1_CTL04E_498 by condition obs Pseudobulking counts for sample 3_CTL08A_498 by condition obs Pseudobulking counts for sample 1_CTL08A_498 by condition obs Pseudobulking counts for sample 3_CTL04E_498 by condition obs Pseudobulking counts for sample 2_CTL04E_498 by condition obs Pseudobulking counts for sample 2_CTL08A_498 by condition obs Pseudobulking counts for sample 1_CTL08A_499 by condition obs Pseudobulking counts for sample 2_CTL04E_499 by condition obs Pseudobulking counts for sample 3_CTL04E_499 by condition obs Pseudobulking counts for sample 3_CTL08A_499 by condition obs Pseudobulking counts for sample 1_CTL04E_499 by condition obs Pseudobulking counts for sample 2_CTL08A_499 by condition obs Pseudobulking counts for sample 2_CTL04E_500 by condition obs Pseudobulking counts for sample 1_CTL04E_500 by condition obs Pseudobulking counts for sample 3_CTL04E_500 by condition obs Pseudobulking counts for sample 2_CTL08A_500 by condition obs Pseudobulking counts for sample 1_CTL08A_500 by condition obs Pseudobulking counts for sample 3_CTL08A_500 by condition obs Pseudobulking counts for sample 3_CTL08A_501 by condition obs Pseudobulking counts for sample 3_CTL04E_501 by condition obs Pseudobulking counts for sample 1_CTL08A_501 by condition obs Pseudobulking counts for sample 2_CTL04E_501 by condition obs Pseudobulking counts for sample 1_CTL04E_501 by condition obs Pseudobulking counts for sample 2_CTL08A_501 by condition obs Pseudobulking counts for sample 3_CTL04E_502 by condition obs Pseudobulking counts for sample 3_CTL08A_502 by condition obs Pseudobulking counts for sample 1_CTL04E_502 by condition obs Pseudobulking counts for sample 1_CTL08A_502 by condition obs Pseudobulking counts for sample 2_CTL08A_502 by condition obs Pseudobulking counts for sample 2_CTL04E_502 by condition obs Pseudobulking counts for sample 2_CTL08A_505 by condition obs Pseudobulking counts for sample 2_CTL04E_505 by condition obs Pseudobulking counts for sample 1_CTL08A_505 by condition obs Pseudobulking counts for sample 3_CTL04E_505 by condition obs Pseudobulking counts for sample 1_CTL04E_505 by condition obs Pseudobulking counts for sample 3_CTL08A_505 by condition obs Pseudobulking counts for sample 1_CTL08A_508 by condition obs Pseudobulking counts for sample 1_CTL04E_508 by condition obs Pseudobulking counts for sample 1_CTL08A_509 by condition obs Pseudobulking counts for sample 3_CTL04E_509 by condition obs Pseudobulking counts for sample 1_CTL04E_509 by condition obs Pseudobulking counts for sample 2_CTL04E_509 by condition obs Pseudobulking counts for sample 3_CTL08A_509 by condition obs
def sanitize_strings(value):
if isinstance(value, str): # Only process string values
return (
value.replace('_', '') # Replace underscores with spaces
.replace('-', '') # Replace hyphens with spaces
.replace('@', '') # Example: Remove "@" character
)
return value # Leave non-strings untouched
Counts = adataPbulk.to_df(layer="pseudoBulk_summedCounts").T
MD = adataPbulk.obs[["condition","line"]]
MD["sample"] = MD.index.tolist()
MD = MD.applymap(sanitize_strings).loc[MD.index]
MD
| condition | line | sample | |
|---|---|---|---|
| 3_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 3CTL04E491.ESTROGENAGONIST |
| 2_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 2CTL08A491.ESTROGENAGONIST |
| 3_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 3CTL08A491.ESTROGENAGONIST |
| 2_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 2CTL04E491.ESTROGENAGONIST |
| 1_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 1CTL08A491.ESTROGENAGONIST |
| ... | ... | ... | ... |
| 1_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 1CTL08A509.RETANTAGONIST |
| 3_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 3CTL04E509.RETANTAGONIST |
| 1_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 1CTL04E509.RETANTAGONIST |
| 2_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 2CTL04E509.RETANTAGONIST |
| 3_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 3CTL08A509.RETANTAGONIST |
85 rows × 3 columns
%load_ext rpy2.ipython
pd.DataFrame.iteritems = pd.DataFrame.items
The rpy2.ipython extension is already loaded. To reload it, use: %reload_ext rpy2.ipython
%%R -i MD -i Counts
library(edgeR)
library(org.Hs.eg.db)
library(AnnotationDbi)
library(stats)
library(topGO)
R[write to console]: Loading required package: limma
R[write to console]: Loading required package: AnnotationDbi
R[write to console]: Loading required package: stats4
R[write to console]: Loading required package: BiocGenerics
R[write to console]:
Attaching package: ‘BiocGenerics’
R[write to console]: The following object is masked from ‘package:limma’:
plotMA
R[write to console]: The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
R[write to console]: The following objects are masked from ‘package:base’:
anyDuplicated, aperm, append, as.data.frame, basename, cbind,
colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
table, tapply, union, unique, unsplit, which.max, which.min
R[write to console]: Loading required package: Biobase
R[write to console]: Welcome to Bioconductor
Vignettes contain introductory material; view with
'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
R[write to console]: Loading required package: IRanges
R[write to console]: Loading required package: S4Vectors
R[write to console]:
Attaching package: ‘S4Vectors’
R[write to console]: The following object is masked from ‘package:utils’:
findMatches
R[write to console]: The following objects are masked from ‘package:base’:
expand.grid, I, unname
R[write to console]:
R[write to console]: Loading required package: graph
R[write to console]: Loading required package: GO.db
R[write to console]:
R[write to console]: Loading required package: SparseM
R[write to console]:
Attaching package: ‘SparseM’
R[write to console]: The following object is masked from ‘package:base’:
backsolve
R[write to console]:
groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
R[write to console]:
Attaching package: ‘topGO’
R[write to console]: The following object is masked from ‘package:IRanges’:
members
MD
| condition | line | sample | |
|---|---|---|---|
| 3_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 3CTL04E491.ESTROGENAGONIST |
| 2_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 2CTL08A491.ESTROGENAGONIST |
| 3_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 3CTL08A491.ESTROGENAGONIST |
| 2_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 2CTL04E491.ESTROGENAGONIST |
| 1_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 1CTL08A491.ESTROGENAGONIST |
| ... | ... | ... | ... |
| 1_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 1CTL08A509.RETANTAGONIST |
| 3_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 3CTL04E509.RETANTAGONIST |
| 1_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 1CTL04E509.RETANTAGONIST |
| 2_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 2CTL04E509.RETANTAGONIST |
| 3_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 3CTL08A509.RETANTAGONIST |
85 rows × 3 columns
%%R -i MD -i Counts -o MDS
CondFactor <- factor(MD[["condition"]],
levels = c("DMSO", setdiff(unique(MD[["condition"]]), "DMSO")))
lineFactor <- factor(MD[["line"]],
levels = c("CTL08A","CTL04E"))
mm <- model.matrix(~0+lineFactor+CondFactor )
rownames(mm) <- colnames(Counts)
dds <- DGEList(Counts, group = CondFactor, genes = rownames(Counts))
# Add metadata to dge
dds$samples$line <- lineFactor
dds$samples$nCells <- MD[["nCells"]]
keep.genes<-filterByExpr(dds,design=mm)
print(table(keep.genes))
dds <- dds[keep.genes,, keep.lib.sizes=FALSE]
dds <- calcNormFactors(dds)
# Store MDS
png(tempfile(fileext = ".png"))
MDS <- plotMDS(dds, pch=16, main="MDS")
dev.off()
#Fit
dds <- estimateGLMRobustDisp(dds, mm)
fit <- glmQLFit(dds, mm)
print(dds$samples)
plotBCV(dds)
plotQLDisp(fit)
keep.genes
FALSE TRUE
18378 17099
group lib.size norm.factors
3_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 4423473 0.9938465
2_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 2512090 1.0077688
3_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 3439236 0.9917460
2_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 4830660 0.9745080
1_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 3474053 1.0004240
1_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 5166547 0.9756648
3_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 2962526 1.0960258
1_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 4417441 1.0425709
1_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 2874090 1.0402190
2_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 3369016 1.0545351
2_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 3763823 1.0345933
3_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 4184552 1.0368455
2_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 1988692 1.0437305
3_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 2565500 1.0215710
3_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 2812934 1.0295907
2_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 2004050 1.0085290
1_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 2851940 1.0200451
1_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 2195680 1.0378173
2_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 2462589 1.0309499
2_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 5144448 1.0124633
1_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 5778573 1.0306182
3_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 4445737 1.0346039
3_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 1801990 1.0154079
1_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 3716041 1.0332832
1_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 3630508 0.9754919
2_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 3058652 0.9684283
3_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 1604786 1.0063004
3_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 3498771 0.9728194
1_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 2242548 0.9994667
2_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 1478612 1.0041808
2_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 8510035 0.9813685
2_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 3858791 0.9973786
1_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 3995416 1.0015983
3_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 10650102 0.9774394
1_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 9317474 0.9729097
3_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 2736227 1.0074878
3_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 2345504 0.9732396
2_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 1922886 0.9890393
3_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 1919090 0.9861535
2_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 2016639 0.9836470
1_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 2105208 0.9730657
1_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 1377734 0.9790765
1_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 7299989 1.0204161
3_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 4438532 1.0450472
1_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 3652151 1.0318353
3_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 7999084 1.0261994
2_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 6835637 1.0224048
2_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 3512888 1.0252983
1_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 1803835 0.9487867
2_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 2167339 0.9768888
3_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 1887427 0.9775628
3_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 2303271 0.9500966
1_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 1757962 0.9676751
2_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 1209493 0.9844379
2_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 1561222 0.9389632
1_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 2347530 0.9500315
3_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 1632353 0.9758253
2_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 1618093 0.9438703
1_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 2201478 0.9207906
3_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 2073223 0.9430869
3_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 1761744 1.0176930
3_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 2369575 1.0028793
1_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 2332264 1.0130421
2_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 2421023 0.9880429
1_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 3088771 0.9961016
2_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 1066907 1.0149836
3_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 3964137 0.9951326
3_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 2392829 0.9989727
1_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 2163874 0.9923870
1_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 2515493 0.9975653
2_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 1892818 1.0070596
2_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 4000189 0.9892277
2_CTL08A_505.DMSO DMSO 2403268 0.9840651
2_CTL04E_505.DMSO DMSO 3462783 0.9993232
1_CTL08A_505.DMSO DMSO 2584191 0.9935466
3_CTL04E_505.DMSO DMSO 4186498 1.0098125
1_CTL04E_505.DMSO DMSO 2587872 1.0083204
3_CTL08A_505.DMSO DMSO 2106415 0.9859619
1_CTL08A_508.RET_AGONIST RETAGONIST 9216400 1.0074486
1_CTL04E_508.RET_AGONIST RETAGONIST 8571922 1.0037813
1_CTL08A_509.RET_ANTAGONIST RETANTAGONIST 2021938 1.0098215
3_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 2978142 1.0178903
1_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 3194628 1.0261241
2_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 2778720 1.0113240
3_CTL08A_509.RET_ANTAGONIST RETANTAGONIST 2383531 0.9994200
line
3_CTL04E_491.ESTROGEN_AGONIST CTL04E
2_CTL08A_491.ESTROGEN_AGONIST CTL08A
3_CTL08A_491.ESTROGEN_AGONIST CTL08A
2_CTL04E_491.ESTROGEN_AGONIST CTL04E
1_CTL08A_491.ESTROGEN_AGONIST CTL08A
1_CTL04E_491.ESTROGEN_AGONIST CTL04E
3_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E
1_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A
1_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E
2_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E
2_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A
3_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A
2_CTL08A_493.ANDROGEN_AGONIST CTL08A
3_CTL04E_493.ANDROGEN_AGONIST CTL04E
3_CTL08A_493.ANDROGEN_AGONIST CTL08A
2_CTL04E_493.ANDROGEN_AGONIST CTL04E
1_CTL04E_493.ANDROGEN_AGONIST CTL04E
1_CTL08A_493.ANDROGEN_AGONIST CTL08A
2_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A
2_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E
1_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E
3_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E
3_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A
1_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A
1_CTL04E_495.ARYL_HYD_AGONIST CTL04E
2_CTL04E_495.ARYL_HYD_AGONIST CTL04E
3_CTL08A_495.ARYL_HYD_AGONIST CTL08A
3_CTL04E_495.ARYL_HYD_AGONIST CTL04E
1_CTL08A_495.ARYL_HYD_AGONIST CTL08A
2_CTL08A_495.ARYL_HYD_AGONIST CTL08A
2_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E
2_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A
1_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A
3_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E
1_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E
3_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A
3_CTL04E_497.LIVER-X_AGONIST CTL04E
2_CTL04E_497.LIVER-X_AGONIST CTL04E
3_CTL08A_497.LIVER-X_AGONIST CTL08A
2_CTL08A_497.LIVER-X_AGONIST CTL08A
1_CTL04E_497.LIVER-X_AGONIST CTL04E
1_CTL08A_497.LIVER-X_AGONIST CTL08A
1_CTL04E_498.LIVER-X_ANTAGONIST CTL04E
3_CTL08A_498.LIVER-X_ANTAGONIST CTL08A
1_CTL08A_498.LIVER-X_ANTAGONIST CTL08A
3_CTL04E_498.LIVER-X_ANTAGONIST CTL04E
2_CTL04E_498.LIVER-X_ANTAGONIST CTL04E
2_CTL08A_498.LIVER-X_ANTAGONIST CTL08A
1_CTL08A_499.GLUCOCORT_AGONIST CTL08A
2_CTL04E_499.GLUCOCORT_AGONIST CTL04E
3_CTL04E_499.GLUCOCORT_AGONIST CTL04E
3_CTL08A_499.GLUCOCORT_AGONIST CTL08A
1_CTL04E_499.GLUCOCORT_AGONIST CTL04E
2_CTL08A_499.GLUCOCORT_AGONIST CTL08A
2_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E
1_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E
3_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E
2_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A
1_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A
3_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A
3_CTL08A_501.THYROID_AGONIST CTL08A
3_CTL04E_501.THYROID_AGONIST CTL04E
1_CTL08A_501.THYROID_AGONIST CTL08A
2_CTL04E_501.THYROID_AGONIST CTL04E
1_CTL04E_501.THYROID_AGONIST CTL04E
2_CTL08A_501.THYROID_AGONIST CTL08A
3_CTL04E_502.THYROID_ANTAGONIST CTL04E
3_CTL08A_502.THYROID_ANTAGONIST CTL08A
1_CTL04E_502.THYROID_ANTAGONIST CTL04E
1_CTL08A_502.THYROID_ANTAGONIST CTL08A
2_CTL08A_502.THYROID_ANTAGONIST CTL08A
2_CTL04E_502.THYROID_ANTAGONIST CTL04E
2_CTL08A_505.DMSO CTL08A
2_CTL04E_505.DMSO CTL04E
1_CTL08A_505.DMSO CTL08A
3_CTL04E_505.DMSO CTL04E
1_CTL04E_505.DMSO CTL04E
3_CTL08A_505.DMSO CTL08A
1_CTL08A_508.RET_AGONIST CTL08A
1_CTL04E_508.RET_AGONIST CTL04E
1_CTL08A_509.RET_ANTAGONIST CTL08A
3_CTL04E_509.RET_ANTAGONIST CTL04E
1_CTL04E_509.RET_ANTAGONIST CTL04E
2_CTL04E_509.RET_ANTAGONIST CTL04E
3_CTL08A_509.RET_ANTAGONIST CTL08A
adataPbulk.obsm["MDS"] = np.vstack([MDS["x"],MDS["y"]]).T
sc.pl.embedding(adataPbulk, basis="MDS", color=["line","condition","nCells"], wspace=.6, size=400)
%%R -o SharedConditionVsDmso
AllGenes <- dim(Counts)[1]
SharedConditionVsDmso <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Construct the contrast expression
contrastExpression <- paste("CondFactor", condition, sep = "")
print(contrastExpression)
# Use the correct argument name 'contrasts' in makeContrasts
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
# Remove the extraneous comma and correctly pass all parameters to glmQLFTest
SharedConditionVsDmso[[paste(condition, ".VS.DMSO",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "logFC")$table
# Optionally, print or store the results
}
[1] "CondFactorESTROGENAGONIST" [1] "CondFactorESTROGENANTAGONIST" [1] "CondFactorANDROGENAGONIST" [1] "CondFactorANDROGENANTAGONIST" [1] "CondFactorARYLHYDAGONIST" [1] "CondFactorARYLHYDANTAGONIST" [1] "CondFactorLIVERXAGONIST" [1] "CondFactorLIVERXANTAGONIST" [1] "CondFactorGLUCOCORTAGONIST" [1] "CondFactorGLUCOCORTANTAGONIST" [1] "CondFactorTHYROIDAGONIST" [1] "CondFactorTHYROIDANTAGONIST" [1] "CondFactorRETAGONIST" [1] "CondFactorRETANTAGONIST"
%%R -o SharedAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
SharedAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
print(condition)
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
# Construct the contrast expression; adjust the separator as needed
contrastExpression <- paste("CondFactor", condition, "-", "CondFactor", condAntagonist, sep = "")
# Create the contrast using makeContrasts (ensure mm is defined)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
# Perform the glmQLFTest and extract the topTags table;
# ensure AllGenes is defined as the number of genes to report
SharedAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"LineShared",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast),
adjust.method = "fdr",
n = AllGenes,
sort.by = "logFC")$table
}
}
[1] "ESTROGENAGONIST" [1] "ESTROGENANTAGONIST" [1] "ANDROGENAGONIST" [1] "ANDROGENANTAGONIST" [1] "ARYLHYDAGONIST" [1] "ARYLHYDANTAGONIST" [1] "LIVERXAGONIST" [1] "LIVERXANTAGONIST" [1] "GLUCOCORTAGONIST" [1] "GLUCOCORTANTAGONIST" [1] "THYROIDAGONIST" [1] "THYROIDANTAGONIST" [1] "RETAGONIST" [1] "RETANTAGONIST"
%%R -i MD -i Counts -o MDS
Group <- factor(paste(MD$line,MD$condition,sep="."))
mm <-model.matrix(~0+Group, data=Group)
colnames(mm)<-levels(Group)
rownames(mm)<-rownames(MD)
dds <- DGEList(Counts, group = Group, genes = rownames(Counts))
keep.genes<-filterByExpr(dds,design=mm)
dds <- dds[keep.genes,, keep.lib.sizes=FALSE]
dds <- calcNormFactors(dds)
dds <- estimateGLMRobustDisp(dds, mm)
fit <- glmQLFit(dds, mm)
# Store MDS
png(tempfile(fileext = ".png"))
MDS <- plotMDS(dds, pch=16, main="MDS")
dev.off()
print(dds$samples)
plotBCV(dds)
group lib.size
3_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 4427367
2_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 2514436
3_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 3442351
2_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 4834386
1_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 3476933
1_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 5170759
3_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 2965808
1_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 4421506
1_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 2876663
2_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 3372338
2_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 3767400
3_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 4188254
2_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 1990877
3_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 2567998
3_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 2816289
2_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 2005829
1_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 2854669
1_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 2197971
2_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 2464967
2_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 5149391
1_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 5784351
3_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 4450075
3_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 1803613
1_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 3720011
1_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 3633031
2_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 3060783
3_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 1606040
3_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 3501067
1_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 2244137
2_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 1479713
2_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 8516203
2_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 3861848
1_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 3999145
3_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 10657828
1_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 9323790
3_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 2738449
3_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 2347256
2_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 1925638
3_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 1920433
2_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 2018195
1_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 2106748
1_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 1378708
1_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 7305800
3_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 4442417
1_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 3655092
3_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 8005354
2_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 6840843
2_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 3515767
1_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 1805590
2_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 2169085
3_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 1889182
3_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 2305049
1_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 1759638
2_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 1210958
2_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 1562322
1_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 2349427
3_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 1633786
2_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 1619307
1_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 2203131
3_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 2074793
3_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 1763337
3_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 2371491
1_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 2334211
2_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 2422919
1_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 3091258
2_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 1067833
3_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 3967397
3_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 2394795
1_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 2165687
1_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 2517516
2_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 1894532
2_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 4003403
2_CTL08A_505.DMSO CTL08A.DMSO 2405123
2_CTL04E_505.DMSO CTL04E.DMSO 3465587
1_CTL08A_505.DMSO CTL08A.DMSO 2586260
3_CTL04E_505.DMSO CTL04E.DMSO 4190069
1_CTL04E_505.DMSO CTL04E.DMSO 2590098
3_CTL08A_505.DMSO CTL08A.DMSO 2107954
1_CTL08A_508.RET_AGONIST CTL08A.RETAGONIST 9229327
1_CTL04E_508.RET_AGONIST CTL04E.RETAGONIST 8584248
1_CTL08A_509.RET_ANTAGONIST CTL08A.RETANTAGONIST 2023842
3_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 2980842
1_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 3197554
2_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 2781065
3_CTL08A_509.RET_ANTAGONIST CTL08A.RETANTAGONIST 2385323
norm.factors
3_CTL04E_491.ESTROGEN_AGONIST 0.9999694
2_CTL08A_491.ESTROGEN_AGONIST 1.0172450
3_CTL08A_491.ESTROGEN_AGONIST 1.0035874
2_CTL04E_491.ESTROGEN_AGONIST 0.9909131
1_CTL08A_491.ESTROGEN_AGONIST 1.0071580
1_CTL04E_491.ESTROGEN_AGONIST 0.9940378
3_CTL04E_492.ESTROGEN_ANTAGONIST 1.0868014
1_CTL08A_492.ESTROGEN_ANTAGONIST 1.0326826
1_CTL04E_492.ESTROGEN_ANTAGONIST 1.0431401
2_CTL04E_492.ESTROGEN_ANTAGONIST 1.0509193
2_CTL08A_492.ESTROGEN_ANTAGONIST 1.0272320
3_CTL08A_492.ESTROGEN_ANTAGONIST 1.0278696
2_CTL08A_493.ANDROGEN_AGONIST 1.0558711
3_CTL04E_493.ANDROGEN_AGONIST 1.0297472
3_CTL08A_493.ANDROGEN_AGONIST 1.0335419
2_CTL04E_493.ANDROGEN_AGONIST 1.0178997
1_CTL04E_493.ANDROGEN_AGONIST 1.0241940
1_CTL08A_493.ANDROGEN_AGONIST 1.0424441
2_CTL08A_494.ANDROGEN_ANTAGONIST 1.0364886
2_CTL04E_494.ANDROGEN_ANTAGONIST 1.0169741
1_CTL04E_494.ANDROGEN_ANTAGONIST 1.0316251
3_CTL04E_494.ANDROGEN_ANTAGONIST 1.0368018
3_CTL08A_494.ANDROGEN_ANTAGONIST 1.0159233
1_CTL08A_494.ANDROGEN_ANTAGONIST 1.0366874
1_CTL04E_495.ARYL_HYD_AGONIST 0.9666301
2_CTL04E_495.ARYL_HYD_AGONIST 0.9636368
3_CTL08A_495.ARYL_HYD_AGONIST 1.0061025
3_CTL04E_495.ARYL_HYD_AGONIST 0.9576880
1_CTL08A_495.ARYL_HYD_AGONIST 0.9856165
2_CTL08A_495.ARYL_HYD_AGONIST 1.0026297
2_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9793711
2_CTL08A_496.ARYL_HYD_ANTAGONIST 0.9981406
1_CTL08A_496.ARYL_HYD_ANTAGONIST 1.0073763
3_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9769057
1_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9704311
3_CTL08A_496.ARYL_HYD_ANTAGONIST 1.0105092
3_CTL04E_497.LIVER-X_AGONIST 0.9628673
2_CTL04E_497.LIVER-X_AGONIST 0.9899891
3_CTL08A_497.LIVER-X_AGONIST 0.9829504
2_CTL08A_497.LIVER-X_AGONIST 0.9754438
1_CTL04E_497.LIVER-X_AGONIST 0.9671078
1_CTL08A_497.LIVER-X_AGONIST 0.9756540
1_CTL04E_498.LIVER-X_ANTAGONIST 1.0124261
3_CTL08A_498.LIVER-X_ANTAGONIST 1.0229033
1_CTL08A_498.LIVER-X_ANTAGONIST 1.0173400
3_CTL04E_498.LIVER-X_ANTAGONIST 1.0157928
2_CTL04E_498.LIVER-X_ANTAGONIST 1.0072546
2_CTL08A_498.LIVER-X_ANTAGONIST 1.0178636
1_CTL08A_499.GLUCOCORT_AGONIST 0.9704569
2_CTL04E_499.GLUCOCORT_AGONIST 0.9715258
3_CTL04E_499.GLUCOCORT_AGONIST 0.9927524
3_CTL08A_499.GLUCOCORT_AGONIST 0.9644021
1_CTL04E_499.GLUCOCORT_AGONIST 0.9813597
2_CTL08A_499.GLUCOCORT_AGONIST 1.0034942
2_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9444915
1_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9668535
3_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9931692
2_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9596988
1_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9247260
3_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9425308
3_CTL08A_501.THYROID_AGONIST 1.0209377
3_CTL04E_501.THYROID_AGONIST 0.9982616
1_CTL08A_501.THYROID_AGONIST 1.0092869
2_CTL04E_501.THYROID_AGONIST 0.9870428
1_CTL04E_501.THYROID_AGONIST 0.9924600
2_CTL08A_501.THYROID_AGONIST 1.0184360
3_CTL04E_502.THYROID_ANTAGONIST 0.9959688
3_CTL08A_502.THYROID_ANTAGONIST 0.9982748
1_CTL04E_502.THYROID_ANTAGONIST 0.9967005
1_CTL08A_502.THYROID_ANTAGONIST 0.9962978
2_CTL08A_502.THYROID_ANTAGONIST 1.0145049
2_CTL04E_502.THYROID_ANTAGONIST 0.9859290
2_CTL08A_505.DMSO 0.9783423
2_CTL04E_505.DMSO 0.9943559
1_CTL08A_505.DMSO 0.9819910
3_CTL04E_505.DMSO 1.0067293
1_CTL04E_505.DMSO 1.0026432
3_CTL08A_505.DMSO 0.9768475
1_CTL08A_508.RET_AGONIST 0.9785361
1_CTL04E_508.RET_AGONIST 0.9791249
1_CTL08A_509.RET_ANTAGONIST 1.0115447
3_CTL04E_509.RET_ANTAGONIST 1.0184315
1_CTL04E_509.RET_ANTAGONIST 1.0328630
2_CTL04E_509.RET_ANTAGONIST 1.0129691
3_CTL08A_509.RET_ANTAGONIST 0.9958377
%%R -o univserse
univserse <- rownames(dds)
pd.Series(list(univserse)).to_excel("./{}_{}_DEGs/UNIVERSE_DEGsPerLineAgonistVsAntagonist.xlsx".format(DS, celltype))
adataPbulk.obsm["MDS"] = np.vstack([MDS["x"],MDS["y"]]).T
sc.pl.embedding(adataPbulk, basis="MDS", color=["line","condition","nCells"], wspace=.6, size=400)
%%R
print(head(mm))
print(colnames(mm))
CTL04E.ANDROGENAGONIST CTL04E.ANDROGENANTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.ARYLHYDAGONIST CTL04E.ARYLHYDANTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.DMSO CTL04E.ESTROGENAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 1
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 1
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 1
CTL04E.ESTROGENANTAGONIST CTL04E.GLUCOCORTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.GLUCOCORTANTAGONIST CTL04E.LIVERXAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.LIVERXANTAGONIST CTL04E.RETAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.RETANTAGONIST CTL04E.THYROIDAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.THYROIDANTAGONIST CTL08A.ANDROGENAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.ANDROGENANTAGONIST CTL08A.ARYLHYDAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.ARYLHYDANTAGONIST CTL08A.DMSO
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.ESTROGENAGONIST CTL08A.ESTROGENANTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 1 0
3_CTL08A_491.ESTROGEN_AGONIST 1 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 1 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.GLUCOCORTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0
2_CTL08A_491.ESTROGEN_AGONIST 0
3_CTL08A_491.ESTROGEN_AGONIST 0
2_CTL04E_491.ESTROGEN_AGONIST 0
1_CTL08A_491.ESTROGEN_AGONIST 0
1_CTL04E_491.ESTROGEN_AGONIST 0
CTL08A.GLUCOCORTANTAGONIST CTL08A.LIVERXAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.LIVERXANTAGONIST CTL08A.RETAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.RETANTAGONIST CTL08A.THYROIDAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.THYROIDANTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0
2_CTL08A_491.ESTROGEN_AGONIST 0
3_CTL08A_491.ESTROGEN_AGONIST 0
2_CTL04E_491.ESTROGEN_AGONIST 0
1_CTL08A_491.ESTROGEN_AGONIST 0
1_CTL04E_491.ESTROGEN_AGONIST 0
[1] "CTL04E.ANDROGENAGONIST" "CTL04E.ANDROGENANTAGONIST"
[3] "CTL04E.ARYLHYDAGONIST" "CTL04E.ARYLHYDANTAGONIST"
[5] "CTL04E.DMSO" "CTL04E.ESTROGENAGONIST"
[7] "CTL04E.ESTROGENANTAGONIST" "CTL04E.GLUCOCORTAGONIST"
[9] "CTL04E.GLUCOCORTANTAGONIST" "CTL04E.LIVERXAGONIST"
[11] "CTL04E.LIVERXANTAGONIST" "CTL04E.RETAGONIST"
[13] "CTL04E.RETANTAGONIST" "CTL04E.THYROIDAGONIST"
[15] "CTL04E.THYROIDANTAGONIST" "CTL08A.ANDROGENAGONIST"
[17] "CTL08A.ANDROGENANTAGONIST" "CTL08A.ARYLHYDAGONIST"
[19] "CTL08A.ARYLHYDANTAGONIST" "CTL08A.DMSO"
[21] "CTL08A.ESTROGENAGONIST" "CTL08A.ESTROGENANTAGONIST"
[23] "CTL08A.GLUCOCORTAGONIST" "CTL08A.GLUCOCORTANTAGONIST"
[25] "CTL08A.LIVERXAGONIST" "CTL08A.LIVERXANTAGONIST"
[27] "CTL08A.RETAGONIST" "CTL08A.RETANTAGONIST"
[29] "CTL08A.THYROIDAGONIST" "CTL08A.THYROIDANTAGONIST"
%%R -o DifferentialEffectAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
DifferentialEffectAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpression <- paste("(CTL04E.", condition, "-", "CTL04E.", condAntagonist,
")-(CTL08A.", condition, "-", "CTL08A.", condAntagonist, ")",
sep = "")
print(contrastExpression)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
DifferentialEffectAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"DifferentialCTL04E-CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
}
[1] "(CTL04E.ESTROGENAGONIST-CTL04E.ESTROGENANTAGONIST)-(CTL08A.ESTROGENAGONIST-CTL08A.ESTROGENANTAGONIST)" [1] "(CTL04E.ANDROGENAGONIST-CTL04E.ANDROGENANTAGONIST)-(CTL08A.ANDROGENAGONIST-CTL08A.ANDROGENANTAGONIST)" [1] "(CTL04E.ARYLHYDAGONIST-CTL04E.ARYLHYDANTAGONIST)-(CTL08A.ARYLHYDAGONIST-CTL08A.ARYLHYDANTAGONIST)" [1] "(CTL04E.LIVERXAGONIST-CTL04E.LIVERXANTAGONIST)-(CTL08A.LIVERXAGONIST-CTL08A.LIVERXANTAGONIST)" [1] "(CTL04E.GLUCOCORTAGONIST-CTL04E.GLUCOCORTANTAGONIST)-(CTL08A.GLUCOCORTAGONIST-CTL08A.GLUCOCORTANTAGONIST)" [1] "(CTL04E.THYROIDAGONIST-CTL04E.THYROIDANTAGONIST)-(CTL08A.THYROIDAGONIST-CTL08A.THYROIDANTAGONIST)" [1] "(CTL04E.RETAGONIST-CTL04E.RETANTAGONIST)-(CTL08A.RETAGONIST-CTL08A.RETANTAGONIST)"
%%R -o PerLineAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
PerLineAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpressionCTL04E <- paste("CTL04E.", condition, "-", "CTL04E.", condAntagonist,sep = "")
print(contrastExpressionCTL04E)
contrastExpressionCTL04E <- makeContrasts(contrasts = contrastExpressionCTL04E, levels = mm)
PerLineAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"_CTL04E",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL04E), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
contrastExpressionCTL08A <- paste("CTL08A.", condition, "-", "CTL08A.", condAntagonist,sep = "")
print(contrastExpressionCTL08A)
contrastExpressionCTL08A <- makeContrasts(contrasts = contrastExpressionCTL08A, levels = mm)
PerLineAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"_CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL08A), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
}
[1] "CTL04E.ESTROGENAGONIST-CTL04E.ESTROGENANTAGONIST" [1] "CTL08A.ESTROGENAGONIST-CTL08A.ESTROGENANTAGONIST" [1] "CTL04E.ANDROGENAGONIST-CTL04E.ANDROGENANTAGONIST" [1] "CTL08A.ANDROGENAGONIST-CTL08A.ANDROGENANTAGONIST" [1] "CTL04E.ARYLHYDAGONIST-CTL04E.ARYLHYDANTAGONIST" [1] "CTL08A.ARYLHYDAGONIST-CTL08A.ARYLHYDANTAGONIST" [1] "CTL04E.LIVERXAGONIST-CTL04E.LIVERXANTAGONIST" [1] "CTL08A.LIVERXAGONIST-CTL08A.LIVERXANTAGONIST" [1] "CTL04E.GLUCOCORTAGONIST-CTL04E.GLUCOCORTANTAGONIST" [1] "CTL08A.GLUCOCORTAGONIST-CTL08A.GLUCOCORTANTAGONIST" [1] "CTL04E.THYROIDAGONIST-CTL04E.THYROIDANTAGONIST" [1] "CTL08A.THYROIDAGONIST-CTL08A.THYROIDANTAGONIST" [1] "CTL04E.RETAGONIST-CTL04E.RETANTAGONIST" [1] "CTL08A.RETAGONIST-CTL08A.RETANTAGONIST"
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Prepare compounds list
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineAgonistVsAntagonist.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 4
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Agonist vs Antagonist logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineAgonistVsAntagonist[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineAgonistVsAntagonist[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
%%R -o PerLineConditionVsDMSO
AllGenes <- dim(Counts)[1]
PerLineConditionVsDMSO <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
contrastExpressionCTL04E <- paste("CTL04E.", condition, "-", "CTL04E.DMSO",sep = "")
print(contrastExpressionCTL04E)
contrastExpressionCTL04E <- makeContrasts(contrasts = contrastExpressionCTL04E, levels = mm)
PerLineConditionVsDMSO[[paste(condition, ".VS.DMSO_CTL04E",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL04E), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
contrastExpressionCTL08A <- paste("CTL08A.", condition, "-", "CTL08A.DMSO",sep = "")
print(contrastExpressionCTL08A)
contrastExpressionCTL08A <- makeContrasts(contrasts = contrastExpressionCTL08A, levels = mm)
PerLineConditionVsDMSO[[paste(condition, ".VS.DMSO_CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL08A), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
[1] "CTL04E.ESTROGENAGONIST-CTL04E.DMSO" [1] "CTL08A.ESTROGENAGONIST-CTL08A.DMSO" [1] "CTL04E.ESTROGENANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ESTROGENANTAGONIST-CTL08A.DMSO" [1] "CTL04E.ANDROGENAGONIST-CTL04E.DMSO" [1] "CTL08A.ANDROGENAGONIST-CTL08A.DMSO" [1] "CTL04E.ANDROGENANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ANDROGENANTAGONIST-CTL08A.DMSO" [1] "CTL04E.ARYLHYDAGONIST-CTL04E.DMSO" [1] "CTL08A.ARYLHYDAGONIST-CTL08A.DMSO" [1] "CTL04E.ARYLHYDANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ARYLHYDANTAGONIST-CTL08A.DMSO" [1] "CTL04E.LIVERXAGONIST-CTL04E.DMSO" [1] "CTL08A.LIVERXAGONIST-CTL08A.DMSO" [1] "CTL04E.LIVERXANTAGONIST-CTL04E.DMSO" [1] "CTL08A.LIVERXANTAGONIST-CTL08A.DMSO" [1] "CTL04E.GLUCOCORTAGONIST-CTL04E.DMSO" [1] "CTL08A.GLUCOCORTAGONIST-CTL08A.DMSO" [1] "CTL04E.GLUCOCORTANTAGONIST-CTL04E.DMSO" [1] "CTL08A.GLUCOCORTANTAGONIST-CTL08A.DMSO" [1] "CTL04E.THYROIDAGONIST-CTL04E.DMSO" [1] "CTL08A.THYROIDAGONIST-CTL08A.DMSO" [1] "CTL04E.THYROIDANTAGONIST-CTL04E.DMSO" [1] "CTL08A.THYROIDANTAGONIST-CTL08A.DMSO" [1] "CTL04E.RETAGONIST-CTL04E.DMSO" [1] "CTL08A.RETAGONIST-CTL08A.DMSO" [1] "CTL04E.RETANTAGONIST-CTL04E.DMSO" [1] "CTL08A.RETANTAGONIST-CTL08A.DMSO"
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineConditionVsDMSO.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 6
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Compound vs DMSO logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineConditionVsDMSO[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineConditionVsDMSO[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Prepare compounds list
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineAgonistVsAntagonist.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 4
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Agonist vs Antagonist logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineAgonistVsAntagonist[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineAgonistVsAntagonist[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
%%R -o DifferentialEffectCompoundVsDMSO
AllGenes <- dim(Counts)[1]
DifferentialEffectCompoundVsDMSO <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpression <- paste("(CTL04E.", condition, "-", "CTL04E.DMSO)-(CTL08A.", condition, "-", "CTL08A.DMSO)",
sep = "")
print(contrastExpression)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
DifferentialEffectCompoundVsDMSO[[paste(condition, ".VS.DMSO.DifferentialCTL04E-CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
[1] "(CTL04E.ESTROGENAGONIST-CTL04E.DMSO)-(CTL08A.ESTROGENAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ESTROGENANTAGONIST-CTL04E.DMSO)-(CTL08A.ESTROGENANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ANDROGENAGONIST-CTL04E.DMSO)-(CTL08A.ANDROGENAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ANDROGENANTAGONIST-CTL04E.DMSO)-(CTL08A.ANDROGENANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ARYLHYDAGONIST-CTL04E.DMSO)-(CTL08A.ARYLHYDAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ARYLHYDANTAGONIST-CTL04E.DMSO)-(CTL08A.ARYLHYDANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.LIVERXAGONIST-CTL04E.DMSO)-(CTL08A.LIVERXAGONIST-CTL08A.DMSO)" [1] "(CTL04E.LIVERXANTAGONIST-CTL04E.DMSO)-(CTL08A.LIVERXANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.GLUCOCORTAGONIST-CTL04E.DMSO)-(CTL08A.GLUCOCORTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.GLUCOCORTANTAGONIST-CTL04E.DMSO)-(CTL08A.GLUCOCORTANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.THYROIDAGONIST-CTL04E.DMSO)-(CTL08A.THYROIDAGONIST-CTL08A.DMSO)" [1] "(CTL04E.THYROIDANTAGONIST-CTL04E.DMSO)-(CTL08A.THYROIDANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.RETAGONIST-CTL04E.DMSO)-(CTL08A.RETAGONIST-CTL08A.DMSO)" [1] "(CTL04E.RETANTAGONIST-CTL04E.DMSO)-(CTL08A.RETANTAGONIST-CTL08A.DMSO)"
FDR = 0.05
dir_path = "./{}_{}_DEGs".format(DS, celltype)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
SharedConditionVsDmso_filtered = {}
for k in list(SharedConditionVsDmso.keys()):
DEGs = SharedConditionVsDmso[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/SharedCompoundVsDmso.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
SharedConditionVsDmso_filtered[k] = DEGs[DEGs["FDR"] < FDR]
SharedAgonistVsAntagonist_filtered = {}
for k in list(SharedAgonistVsAntagonist.keys()):
DEGs = SharedAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/SharedAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
SharedAgonistVsAntagonist_filtered[k] = DEGs[DEGs["FDR"] < FDR]
DifferentialEffectCompoundVsDMSO_filtered = {}
for k in list(DifferentialEffectCompoundVsDMSO.keys()):
DEGs = DifferentialEffectCompoundVsDMSO[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DifferentialEffectCompoundVsDMSO.{}_{}_{}.FDR{}.xlsx".format(DS, celltype, k, celltype, DS,FDR))
DifferentialEffectCompoundVsDMSO_filtered[k] = DEGs[DEGs["FDR"] < FDR]
DifferentialEffectAgonistVsAntagonist_filtered = {}
for k in list(DifferentialEffectAgonistVsAntagonist.keys()):
DEGs = DifferentialEffectAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DifferentialEffectAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype, k, celltype, DS,FDR))
DifferentialEffectAgonistVsAntagonist_filtered[k] = DEGs[DEGs["FDR"] < FDR]
PerLineAgonistVsAntagonist_filtered = {}
for k in list(PerLineAgonistVsAntagonist.keys()):
DEGs = PerLineAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DEGsPerLineAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
PerLineAgonistVsAntagonist_filtered["UP_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] > 2)]
PerLineAgonistVsAntagonist_filtered["DOWN_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] < -2)]
PerLineConditionVsDMSO_filtered = {}
for k in list(PerLineConditionVsDMSO.keys()):
DEGs = PerLineConditionVsDMSO[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DEGsPerLineCompoundVsDMSO.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
PerLineConditionVsDMSO_filtered["UP_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] > 2)]
PerLineConditionVsDMSO_filtered["DOWN_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] < -2)]
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
JaccardDF = pd.DataFrame(index = list(PerLineAgonistVsAntagonist_filtered.keys()), columns = list(PerLineAgonistVsAntagonist_filtered.keys()))
for combination in list(itertools.combinations(list(PerLineAgonistVsAntagonist_filtered.keys()),2)):
set0 = set(PerLineAgonistVsAntagonist_filtered[combination[0]].genes.tolist())
set1 = set(PerLineAgonistVsAntagonist_filtered[combination[1]].genes.tolist())
Int = set0.intersection(set1)
Union = set0.union(set1)
Jaccard = len(Int) / len(Union)
JaccardDF.loc[combination[0],combination[1]] = Jaccard
JaccardDF.loc[combination[1],combination[0]] = Jaccard
JaccardDF = JaccardDF.fillna(1)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Assuming JaccardDF is your DataFrame of Jaccard similarities
# Convert similarity to distance: distance = 1 - similarity
distance_matrix = 1 - JaccardDF.astype(float)
# Optionally, sort the DataFrame for a canonical order
distance_matrix = distance_matrix.sort_index().sort_index(axis=1)
# Create a clustermap using the 'bwr' colormap with improved aesthetics.
g = sns.clustermap(
distance_matrix,
metric="euclidean", # or 'cityblock', 'cosine', etc.
method="average", # clustering method: single, complete, ward, etc.
cmap="bwr_r", # blue-white-red palette
figsize=(15, 15),
cbar_pos=(0.02, 0.8, 0.05, 0.18) # adjust colorbar position if needed
)
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
# The clustermap reorders rows/columns; get the reordered DataFrame.
reordered_data = distance_matrix.iloc[
g.dendrogram_row.reordered_ind, g.dendrogram_col.reordered_ind
]
# Annotate each cell with the corresponding Jaccard similarity value.
# Note: similarity = 1 - distance
for i, row in enumerate(reordered_data.index):
for j, col in enumerate(reordered_data.columns):
d = reordered_data.loc[row, col]
jaccard_sim = 1 - d # Convert back to similarity
g.ax_heatmap.text(
j + 0.5, i + 0.5,
f"{jaccard_sim:.2f}",
ha='center', va='center',
color='black', fontsize=8
)
# Remove grid lines from the heatmap
g.ax_heatmap.grid(False)
# Improve overall aesthetics
g.fig.suptitle("DEGs Jaccard similarity Agonist vs Antagonist", fontsize=16, y=1.05)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=8)
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8)
g.ax_heatmap.tick_params(axis='both', which='both', length=0)
g.cax.set_position([.92, .2, .03, .45]) # adjust the colorbar position if needed
plt.show()
### CONDITION VS DMSO
### CONDITION VS DMSO
### CONDITION VS DMSO
### CONDITION VS DMSO
JaccardDF = pd.DataFrame(index = list(PerLineConditionVsDMSO_filtered.keys()), columns = list(PerLineConditionVsDMSO_filtered.keys()))
for combination in list(itertools.combinations(list(PerLineConditionVsDMSO_filtered.keys()),2)):
set0 = set(PerLineConditionVsDMSO_filtered[combination[0]].genes.tolist())
set1 = set(PerLineConditionVsDMSO_filtered[combination[1]].genes.tolist())
Int = set0.intersection(set1)
Union = set0.union(set1)
Jaccard = len(Int) / len(Union)
JaccardDF.loc[combination[0],combination[1]] = Jaccard
JaccardDF.loc[combination[1],combination[0]] = Jaccard
JaccardDF = JaccardDF.fillna(1)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Assuming JaccardDF is your DataFrame of Jaccard similarities
# Convert similarity to distance: distance = 1 - similarity
distance_matrix = 1 - JaccardDF.astype(float)
# Optionally, sort the DataFrame for a canonical order
distance_matrix = distance_matrix.sort_index().sort_index(axis=1)
# Create a clustermap using the 'bwr' colormap with improved aesthetics.
g = sns.clustermap(
distance_matrix,
metric="euclidean", # or 'cityblock', 'cosine', etc.
method="average", # clustering method: single, complete, ward, etc.
cmap="bwr_r", # blue-white-red palette
figsize=(20, 20),
cbar_pos=(0.02, 0.8, 0.05, 0.18) # adjust colorbar position if needed
)
# Hide the dendrograms (do not display them)
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
# The clustermap reorders rows/columns; get the reordered DataFrame.
reordered_data = distance_matrix.iloc[
g.dendrogram_row.reordered_ind, g.dendrogram_col.reordered_ind
]
# Annotate each cell with the corresponding Jaccard similarity value.
# Note: similarity = 1 - distance
for i, row in enumerate(reordered_data.index):
for j, col in enumerate(reordered_data.columns):
d = reordered_data.loc[row, col]
jaccard_sim = 1 - d # Convert back to similarity
g.ax_heatmap.text(
j + 0.5, i + 0.5,
f"{jaccard_sim:.2f}",
ha='center', va='center',
color='black', fontsize=5
)
# Remove grid lines from the heatmap
g.ax_heatmap.grid(False)
# Improve overall aesthetics
g.fig.suptitle("DEGs Jaccard similarity Compound vs DMSO", fontsize=16, y=1.05)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=8)
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8)
g.ax_heatmap.tick_params(axis='both', which='both', length=0)
g.cax.set_position([.92, .2, .03, .45]) # adjust the colorbar position if needed
plt.show()
CTL04Eresults = pd.concat([PerLineAgonistVsAntagonist_filtered[k].assign(Line="CTL04E").assign(Group=k) for k in list(PerLineAgonistVsAntagonist_filtered.keys()) if "_CTL04E" in k], ignore_index=True)
CTL04Eresults["Direction"] = CTL04Eresults.Group.str.split("_", expand=True)[0]
CTL04Eresults["Contrast"] = CTL04Eresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL04Eresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL04Eresults = CTL04Eresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
CTL08Aresults = pd.concat([PerLineAgonistVsAntagonist_filtered[k].assign(Line="CTL08A").assign(Group=k) for k in list(PerLineAgonistVsAntagonist_filtered.keys()) if "_CTL08A" in k], ignore_index=True)
CTL08Aresults["Direction"] = CTL08Aresults.Group.str.split("_", expand=True)[0]
CTL08Aresults["Contrast"] = CTL08Aresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL08Aresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL08Aresults = CTL08Aresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
import importlib
import matplotlib.pyplot as plt
import seaborn as sns
# Reload matplotlib.pyplot to restore its original state
# Prepare your summary DataFrame indexed by 'Contrast'
summary_results = {}
summary_results["CTL04E"] = CTL04Eresults.set_index('Contrast')[['Up', 'Down', 'Total']]
summary_results["CTL08A"] = CTL08Aresults.set_index('Contrast')[['Up', 'Down', 'Total']]
# Compute global minimum and maximum values across both DataFrames for uniform scaling
global_min = min(df.min().min() for df in summary_results.values())
global_max = max(df.max().max() for df in summary_results.values())
# Create a figure with 1 row and 2 columns of subplots
fig, axes = plt.subplots(1, 2, figsize=(20, 10))
# Loop through the dictionary keys and plot each heatmap on its corresponding axis
for n, k in enumerate(["CTL08A", "CTL04E"]):
ax = axes[n] # get the current axis
show_cbar = (n == 1)
sns.heatmap(
summary_results[k],
annot=True,
annot_kws={"size": 20}, # Increase text size here
fmt="d",
cmap='YlOrBr',
linewidths=0,
linecolor='none',
vmin=global_min, # set uniform minimum
vmax=global_max, # set uniform maximum
ax=ax,
cbar=show_cbar,
cbar_kws={'label': 'Count'} if show_cbar else None,
)
# Remove any visible grid lines by setting the QuadMesh edge color to the face color
ax.collections[0].set_edgecolor("face")
ax.grid(False)
# Set the axis labels and title using the axis methods
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_title(k, fontdict={'size': 25})
# Adjust the layout and display the figure
plt.tight_layout()
plt.show()
CTL04Eresults = pd.concat([PerLineConditionVsDMSO_filtered[k].assign(Line="CTL04E").assign(Group=k) for k in list(PerLineConditionVsDMSO_filtered.keys()) if "_CTL04E" in k], ignore_index=True)
CTL04Eresults["Direction"] = CTL04Eresults.Group.str.split("_", expand=True)[0]
CTL04Eresults["Contrast"] = CTL04Eresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL04Eresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL04Eresults = CTL04Eresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
CTL08Aresults = pd.concat([PerLineConditionVsDMSO_filtered[k].assign(Line="CTL08A").assign(Group=k) for k in list(PerLineConditionVsDMSO_filtered.keys()) if "_CTL08A" in k], ignore_index=True)
CTL08Aresults["Direction"] = CTL08Aresults.Group.str.split("_", expand=True)[0]
CTL08Aresults["Contrast"] = CTL08Aresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL08Aresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL08Aresults = CTL08Aresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
# Reload matplotlib.pyplot to restore its original state
importlib.reload(plt)
# Prepare your summary DataFrame indexed by 'Contrast'
summary_results = {}
summary_results["CTL04E"] = CTL04Eresults.set_index('Contrast')[['Up', 'Down', 'Total']]
summary_results["CTL08A"] = CTL08Aresults.set_index('Contrast')[['Up', 'Down', 'Total']]
# Compute global minimum and maximum values across both DataFrames for uniform scaling
global_min = min(df.min().min() for df in summary_results.values())
global_max = max(df.max().max() for df in summary_results.values())
# Create a figure with 1 row and 2 columns of subplots
fig, axes = plt.subplots(1, 2, figsize=(20, 20))
# Loop through the dictionary keys and plot each heatmap on its corresponding axis
for n, k in enumerate(["CTL08A", "CTL04E"]):
ax = axes[n] # get the current axis
show_cbar = (n == 1)
sns.heatmap(
summary_results[k],
annot=True,
annot_kws={"size": 20}, # Increase text size here
fmt="d",
cmap='YlOrBr',
linewidths=0,
linecolor='none',
vmin=global_min, # set uniform minimum
vmax=global_max, # set uniform maximum
ax=ax,
cbar=show_cbar,
cbar_kws={'label': 'Count'} if show_cbar else None,
)
# Remove any visible grid lines by setting the QuadMesh edge color to the face color
ax.collections[0].set_edgecolor("face")
ax.grid(False)
# Set the axis labels and title using the axis methods
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_title(k, fontdict={'size': 25})
# Adjust the layout and display the figure
plt.tight_layout()
plt.show()
ScatterploDict = {}
for k in list(PerLineConditionVsDMSO.keys()):
Compound = k.split(".")[0]
if Compound not in list(ScatterploDict.keys()):
ScatterploDict[Compound] = pd.DataFrame()
Line = k.split("_")[-1]
DF = PerLineConditionVsDMSO[k].assign(line=Line)
DifferentiallyResponding = DifferentialEffectCompoundVsDMSO_filtered[f"{Compound}.VS.DMSO.DifferentialCTL04E-CTL08A"]
DifferentiallyResponding = DifferentiallyResponding[abs(DifferentiallyResponding["logFC"]) >= 1.5]
DifferentiallyResponding = DifferentiallyResponding[DifferentiallyResponding["FDR"] <= 0.05].genes.tolist()
DF = DF[DF.genes.isin(DifferentiallyResponding)]
ScatterploDict[Compound] = pd.concat([ScatterploDict[Compound], DF], ignore_index=True)
ScatterploDict[Compound]
for Compound in list(ScatterploDict.keys()):
# Assuming ScatterploDict[Compound] is your DataFrame
df = ScatterploDict[Compound]
# Pivot the DataFrame so that each gene is a row and you have one column per line
df_pivot = df.pivot(index='genes', columns='line', values='logFC')
# Optionally, drop genes that don't have logFC values for both lines
df_pivot = df_pivot.fillna(0)
if df_pivot.shape[0] == 0:
print(f"No genes differentially regulated found for {Compound} according to selected filters")
continue
df_pivot["Delta"] = abs(df_pivot["CTL04E"] - df_pivot["CTL08A"])
plt.figure(figsize=(10, 10))
# Optionally scale the marker sizes (e.g., multiply Delta by a constant factor)
marker_sizes = df_pivot["Delta"] * 100 # Adjust scaling factor as needed
# Plot the scatter points using the correct parameter "s" for sizes
plt.scatter(df_pivot['CTL08A'], df_pivot['CTL04E'], alpha=0.2, color='blue', s=marker_sizes)
# Annotate each point with its gene name
for gene in df_pivot.index:
x = df_pivot.loc[gene, 'CTL08A']
y = df_pivot.loc[gene, 'CTL04E']
plt.text(x, y, gene, fontsize=8, alpha=0.8)
# Draw reference lines at 0 (for 0-centered values)
plt.axvline(0, color='gray', linestyle='--')
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel('CTL08A logFC ', fontsize=25)
plt.ylabel('CTL04E logFC ', fontsize=25)
plt.title(Compound, fontsize=25)
plt.tight_layout()
plt.show()
No genes differentially regulated found for THYROIDANTAGONIST according to selected filters