import scanpy as sc
import pandas as pd
from matplotlib import pylab
import random
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import itertools
import plotly.express as px
import yaml
import numpy as np
import random
import anndata as ad
from scipy.sparse import csr_matrix, issparse
from scipy import sparse
from matplotlib.colors import TwoSlopeNorm
import scanpy.external as sce
import sys
import scvelo as scv
import anndata
import rapids_singlecell as rsc
import anndata
from pathlib import Path
try:
nb_fname = ipynbname.name()
except:
nb_fname = os.path.basename(globals()["__vsc_ipynb_file__"])
print(nb_fname)
10.2_Hormones_substudy_LineMagnification_Neurons.ipynb
sc.settings.set_figure_params(dpi=100, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (9, 9)
homeDir = os.getenv("HOME")
sys.path.insert(1, homeDir+"/utils/")
from PlotPCA_components import *
from AtlasClasses import *
DS="hormones_substudy"
celltype="Neurons"
ReferenceTissue="cortex"
with open(homeDir+"/utils/ReferenceDict.yaml", 'r') as file:
ReferencePaths = yaml.safe_load(file)
for k in list(ReferencePaths.keys()):
ReferencePaths[k]["adataPath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["adataPath"]
#ReferencePaths[k]["signaturePath"] = homeDir+ReferencePaths[k]["signaturePath"]
ReferencePaths[k]["signaturePath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["signaturePath"]
ReferencePaths[k]["signaturePurityPath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["signaturePurityPath"]
sc.settings.set_figure_params(dpi=80, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (6, 6)
dir_path = homeDir+"/adatas/{}_ByMacroCelltype".format(DS)
pcs=15
n_neighbors=30
seed = 437
adata = sc.read_h5ad(dir_path+"/{}.{}.h5ad".format(DS,celltype))
adata
AnnData object with n_obs × n_vars = 120550 × 36601
obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap'
uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
layers: 'counts'
obsp: 'connectivities', 'distances'
# Clean ribo, mito anmd sec genes
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
adata = adata[:,~adata.var_names.str.startswith("MT-")].copy()
# ribosomal genes
adata = adata[:,~adata.var_names.str.startswith(("RPS", "RPL"))].copy()
# Sex genes
print(adata)
adata = adata[:,~adata.var_names.isin(pd.read_csv(homeDir+"/resources/X.genes.tsv", header=None)[0].tolist())].copy()
adata = adata[:,~adata.var_names.isin(pd.read_csv(homeDir+"/resources/Y.genes.tsv", header=None)[0].tolist())].copy()
print(adata)
# Re-normalize
adata.X = adata.layers["counts"].copy()
sc.pp.normalize_total(adata, 20000)
sc.pp.log1p(adata)
adata.layers["logNorm"] = adata.X.copy()
AnnData object with n_obs × n_vars = 120550 × 36485
obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap'
uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
layers: 'counts'
obsp: 'connectivities', 'distances'
AnnData object with n_obs × n_vars = 120550 × 35477
obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap'
uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
layers: 'counts'
obsp: 'connectivities', 'distances'
WARNING: adata.X seems to be already log-transformed.
sc.pl.umap(adata, color=["line","condition"])
adata.obs["condition"].value_counts()
condition ARYL_HYD_ANTAGONIST 12227 ANDROGEN_ANTAGONIST 10305 LIVER-X_AGONIST 9230 ARYL_HYD_AGONIST 8562 ESTROGEN_AGONIST 8322 RET_AGONIST 8321 THYROID_ANTAGONIST 8216 LIVER-X_ANTAGONIST 8199 GLUCOCORT_ANTAGONIST 7743 DMSO 7001 GLUCOCORT_AGONIST 6965 ANDROGEN_AGONIST 6845 THYROID_AGONIST 6714 ESTROGEN_ANTAGONIST 6420 RET_ANTAGONIST 5480 Name: count, dtype: int64
HVGSCondict = {}
for line in adata.obs["line"].unique().tolist():
adataGenotype = adata[adata.obs["line"] == line]
for condition in adataGenotype.obs["condition"].unique().tolist():
adataCOnd = adataGenotype[adataGenotype.obs["condition"] == condition].copy()
if condition not in list(HVGSCondict.keys()):
HVGSCondict[condition] = []
#Print Nsamples
nsamples = len(adataCOnd.obs["sample_id"].unique().tolist())
print(f"Found {nsamples} repliacates for {line} {condition}")
if nsamples < 3:
minOverlap = nsamples
else:
minOverlap = nsamples-1
print(f"Only HVGs common to {minOverlap} replicates will be kept")
sc.pp.highly_variable_genes(adataCOnd, batch_key="sample_id", flavor="seurat", n_top_genes=1000)
CommonHVGs = set(adataCOnd.var_names[adataCOnd.var["highly_variable_nbatches"] >= minOverlap])
nHVGs = len(CommonHVGs)
print(f"Found {nHVGs} common HVGs across {minOverlap} replicates for line {line} exposure {condition} ")
print("\n")
HVGSCondict[condition].append(CommonHVGs)
Found 3 repliacates for CTL04E ESTROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 552 common HVGs across 2 replicates for line CTL04E exposure ESTROGEN_AGONIST Found 3 repliacates for CTL04E ESTROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 553 common HVGs across 2 replicates for line CTL04E exposure ESTROGEN_ANTAGONIST Found 3 repliacates for CTL04E ANDROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 512 common HVGs across 2 replicates for line CTL04E exposure ANDROGEN_AGONIST Found 3 repliacates for CTL04E ANDROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 545 common HVGs across 2 replicates for line CTL04E exposure ANDROGEN_ANTAGONIST Found 3 repliacates for CTL04E ARYL_HYD_AGONIST Only HVGs common to 2 replicates will be kept Found 573 common HVGs across 2 replicates for line CTL04E exposure ARYL_HYD_AGONIST Found 3 repliacates for CTL04E ARYL_HYD_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 674 common HVGs across 2 replicates for line CTL04E exposure ARYL_HYD_ANTAGONIST Found 3 repliacates for CTL04E LIVER-X_AGONIST Only HVGs common to 2 replicates will be kept Found 552 common HVGs across 2 replicates for line CTL04E exposure LIVER-X_AGONIST Found 3 repliacates for CTL04E LIVER-X_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 706 common HVGs across 2 replicates for line CTL04E exposure LIVER-X_ANTAGONIST Found 3 repliacates for CTL04E GLUCOCORT_AGONIST Only HVGs common to 2 replicates will be kept Found 548 common HVGs across 2 replicates for line CTL04E exposure GLUCOCORT_AGONIST Found 3 repliacates for CTL04E GLUCOCORT_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 537 common HVGs across 2 replicates for line CTL04E exposure GLUCOCORT_ANTAGONIST Found 3 repliacates for CTL04E THYROID_AGONIST Only HVGs common to 2 replicates will be kept Found 550 common HVGs across 2 replicates for line CTL04E exposure THYROID_AGONIST Found 3 repliacates for CTL04E THYROID_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 489 common HVGs across 2 replicates for line CTL04E exposure THYROID_ANTAGONIST Found 3 repliacates for CTL04E DMSO Only HVGs common to 2 replicates will be kept Found 508 common HVGs across 2 replicates for line CTL04E exposure DMSO Found 1 repliacates for CTL04E RET_AGONIST Only HVGs common to 1 replicates will be kept Found 1000 common HVGs across 1 replicates for line CTL04E exposure RET_AGONIST Found 3 repliacates for CTL04E RET_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 516 common HVGs across 2 replicates for line CTL04E exposure RET_ANTAGONIST Found 3 repliacates for CTL08A ESTROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 554 common HVGs across 2 replicates for line CTL08A exposure ESTROGEN_AGONIST Found 3 repliacates for CTL08A ESTROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 568 common HVGs across 2 replicates for line CTL08A exposure ESTROGEN_ANTAGONIST Found 3 repliacates for CTL08A ANDROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 572 common HVGs across 2 replicates for line CTL08A exposure ANDROGEN_AGONIST Found 3 repliacates for CTL08A ANDROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 548 common HVGs across 2 replicates for line CTL08A exposure ANDROGEN_ANTAGONIST Found 3 repliacates for CTL08A ARYL_HYD_AGONIST Only HVGs common to 2 replicates will be kept Found 568 common HVGs across 2 replicates for line CTL08A exposure ARYL_HYD_AGONIST Found 3 repliacates for CTL08A ARYL_HYD_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 670 common HVGs across 2 replicates for line CTL08A exposure ARYL_HYD_ANTAGONIST Found 3 repliacates for CTL08A LIVER-X_AGONIST Only HVGs common to 2 replicates will be kept Found 626 common HVGs across 2 replicates for line CTL08A exposure LIVER-X_AGONIST Found 3 repliacates for CTL08A LIVER-X_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 738 common HVGs across 2 replicates for line CTL08A exposure LIVER-X_ANTAGONIST Found 3 repliacates for CTL08A GLUCOCORT_AGONIST Only HVGs common to 2 replicates will be kept Found 536 common HVGs across 2 replicates for line CTL08A exposure GLUCOCORT_AGONIST Found 3 repliacates for CTL08A GLUCOCORT_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 550 common HVGs across 2 replicates for line CTL08A exposure GLUCOCORT_ANTAGONIST Found 3 repliacates for CTL08A THYROID_AGONIST Only HVGs common to 2 replicates will be kept Found 537 common HVGs across 2 replicates for line CTL08A exposure THYROID_AGONIST Found 3 repliacates for CTL08A THYROID_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 484 common HVGs across 2 replicates for line CTL08A exposure THYROID_ANTAGONIST Found 3 repliacates for CTL08A DMSO Only HVGs common to 2 replicates will be kept Found 586 common HVGs across 2 replicates for line CTL08A exposure DMSO Found 1 repliacates for CTL08A RET_AGONIST Only HVGs common to 1 replicates will be kept Found 1000 common HVGs across 1 replicates for line CTL08A exposure RET_AGONIST Found 2 repliacates for CTL08A RET_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 441 common HVGs across 2 replicates for line CTL08A exposure RET_ANTAGONIST
sc.pp.highly_variable_genes(adata, batch_key="line", flavor="seurat", n_top_genes=1000)
HVGSLine = set(adata.var_names[adata.var["highly_variable_nbatches"] >=1].tolist())
nHVGSLine = len(HVGSLine)
print(f"Found {nHVGSLine} common pan-condition HVGs across both lines ")
Found 1601 common pan-condition HVGs across both lines
##### First HVGs set
##### First HVGs set
IntersectionDEGs = []
for k in list(HVGSCondict.keys()):
IntersectionDEGsCond = list(set.union(*HVGSCondict[k]))
nIntersectionDEGsCond = len(IntersectionDEGsCond)
#print(f"Found {nIntersectionDEGsCond} HVGs from the intersection of both genotypes replicates for exposure {condition} ")
IntersectionDEGs.extend(IntersectionDEGsCond)
IntersectionDEGs = set(IntersectionDEGs)
nIntersectionDEGsCond = len(IntersectionDEGs)
print(f"Found {nIntersectionDEGsCond} HVGs from the union of both genotypes replicates for exposure {condition} ")
##### Second HVGs set
##### Second HVGs set
FinalHVGs = list(IntersectionDEGs.union(HVGSLine))
nFinalHVGs = len(FinalHVGs)
print(f"Found {nFinalHVGs} HVGs from the union of both genotypes replicates and across exposures ")
Found 3998 HVGs from the union of both genotypes replicates for exposure RET_ANTAGONIST Found 4608 HVGs from the union of both genotypes replicates and across exposures
adata
AnnData object with n_obs × n_vars = 120550 × 35477
obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype'
var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap'
uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
layers: 'counts', 'logNorm'
obsp: 'connectivities', 'distances'
adata.var["highly_variable_seurat_overlap"] = adata.var_names.isin(FinalHVGs)
print(adata.var["highly_variable_seurat_overlap"].sum())
adata.var["highly_variable"] = adata.var["highly_variable_seurat_overlap"]
sc.pp.scale(adata)
adata.layers["scaled"] = adata.X.copy()
sc.pp.pca(adata, use_highly_variable=True)
sc.pp.neighbors(adata, n_pcs=pcs, n_neighbors=n_neighbors, random_state=seed)
adata.X = adata.layers["logNorm"].copy()
4608
sc.tl.umap(adata, random_state=seed)
sc.tl.draw_graph(adata)
sc.pl.umap(adata, color=["line","condition","Consensus_call"], size=10, vmin='p1', vmax='p99', wspace=.2, ncols=2)
sc.pl.draw_graph(adata, color=["line","condition","Consensus_call"], size=10, vmin='p1', vmax='p99', wspace=.2, ncols=2)
print(adata.X.max())
print(adata.X.min())
del adata.layers["scaled"]
del adata.layers["logNorm"]
# Store results
adata.write_h5ad(dir_path+"/{}.{}.LineMagnification.h5ad".format(DS,celltype))
adata = sc.read_h5ad(dir_path+"/{}.{}.LineMagnification.h5ad".format(DS,celltype))
adataPbulk = adata.copy()
adataPbulk.obs["line_condition"] = adataPbulk.obs["line"].astype(str)+"_"+ adataPbulk.obs["condition"].astype(str)
import hashlib Xtab = pd.crosstab(adataPbulk.obs["condition"],adataPbulk.obs["line"]) print(Xtab) minCells = Xtab.min().min() Compositionhash = hashlib.sha256(Xtab.sort_index(axis=1).to_csv(index=True).encode('utf-8')).hexdigest()
SelectedCells = [] SelectedCellsFile = "./{}.{}.selectedBCs.txt".format(nb_fname,Compositionhash) if os.path.isfile(SelectedCellsFile): print("Loading existing cells subset") SelectedCells = pd.read_csv(SelectedCellsFile)["0"].tolist() else: print("File not found or compositions are changed, performing downsampling to balance conditions/lines") for line in adataPbulk.obs["line_condition"].unique(): pbulkCond = adataPbulk[(adataPbulk.obs["line_condition"] == line)]
pbulkCondCells = random.sample(pbulkCond.obs_names.tolist(), minCells)
SelectedCells.extend(pbulkCondCells)
pd.Series(SelectedCells).to_csv(SelectedCellsFile)
adataPbulk = adataPbulk[SelectedCells] pd.crosstab(adataPbulk.obs["condition"],adataPbulk.obs["line"])
adataPbulk = AtlasTile(adataPbulk, regionObs="harmonizedRegion", tissueObs="tissue", ageObs="PCW", study=DS,scalevar=True, groupCov="groupCov")
adataPbulk.AggregateCounts(method="pseudoBulk", groupcov="sample_id", layer="counts", cellStateObs="condition", prenormalize=False, postnormalize=False)
adataPbulk = adataPbulk.uns["pseudoBulkadata.layer_counts.by_sample_id_condition"].copy()
Pseudobulking counts for sample 3_CTL04E_491 by condition obs Pseudobulking counts for sample 2_CTL08A_491 by condition obs Pseudobulking counts for sample 3_CTL08A_491 by condition obs Pseudobulking counts for sample 2_CTL04E_491 by condition obs Pseudobulking counts for sample 1_CTL08A_491 by condition obs Pseudobulking counts for sample 1_CTL04E_491 by condition obs Pseudobulking counts for sample 3_CTL04E_492 by condition obs Pseudobulking counts for sample 1_CTL08A_492 by condition obs Pseudobulking counts for sample 1_CTL04E_492 by condition obs Pseudobulking counts for sample 2_CTL04E_492 by condition obs Pseudobulking counts for sample 2_CTL08A_492 by condition obs Pseudobulking counts for sample 3_CTL08A_492 by condition obs Pseudobulking counts for sample 2_CTL08A_493 by condition obs Pseudobulking counts for sample 3_CTL04E_493 by condition obs Pseudobulking counts for sample 3_CTL08A_493 by condition obs Pseudobulking counts for sample 2_CTL04E_493 by condition obs Pseudobulking counts for sample 1_CTL04E_493 by condition obs Pseudobulking counts for sample 1_CTL08A_493 by condition obs Pseudobulking counts for sample 2_CTL08A_494 by condition obs Pseudobulking counts for sample 2_CTL04E_494 by condition obs Pseudobulking counts for sample 1_CTL04E_494 by condition obs Pseudobulking counts for sample 3_CTL04E_494 by condition obs Pseudobulking counts for sample 3_CTL08A_494 by condition obs Pseudobulking counts for sample 1_CTL08A_494 by condition obs Pseudobulking counts for sample 1_CTL04E_495 by condition obs Pseudobulking counts for sample 2_CTL04E_495 by condition obs Pseudobulking counts for sample 3_CTL08A_495 by condition obs Pseudobulking counts for sample 3_CTL04E_495 by condition obs Pseudobulking counts for sample 1_CTL08A_495 by condition obs Pseudobulking counts for sample 2_CTL08A_495 by condition obs Pseudobulking counts for sample 2_CTL04E_496 by condition obs Pseudobulking counts for sample 2_CTL08A_496 by condition obs Pseudobulking counts for sample 1_CTL08A_496 by condition obs Pseudobulking counts for sample 3_CTL04E_496 by condition obs Pseudobulking counts for sample 1_CTL04E_496 by condition obs Pseudobulking counts for sample 3_CTL08A_496 by condition obs Pseudobulking counts for sample 3_CTL04E_497 by condition obs Pseudobulking counts for sample 2_CTL04E_497 by condition obs Pseudobulking counts for sample 3_CTL08A_497 by condition obs Pseudobulking counts for sample 2_CTL08A_497 by condition obs Pseudobulking counts for sample 1_CTL04E_497 by condition obs Pseudobulking counts for sample 1_CTL08A_497 by condition obs Pseudobulking counts for sample 1_CTL04E_498 by condition obs Pseudobulking counts for sample 3_CTL08A_498 by condition obs Pseudobulking counts for sample 1_CTL08A_498 by condition obs Pseudobulking counts for sample 3_CTL04E_498 by condition obs Pseudobulking counts for sample 2_CTL04E_498 by condition obs Pseudobulking counts for sample 2_CTL08A_498 by condition obs Pseudobulking counts for sample 1_CTL08A_499 by condition obs Pseudobulking counts for sample 2_CTL04E_499 by condition obs Pseudobulking counts for sample 3_CTL04E_499 by condition obs Pseudobulking counts for sample 3_CTL08A_499 by condition obs Pseudobulking counts for sample 1_CTL04E_499 by condition obs Pseudobulking counts for sample 2_CTL08A_499 by condition obs Pseudobulking counts for sample 2_CTL04E_500 by condition obs Pseudobulking counts for sample 1_CTL04E_500 by condition obs Pseudobulking counts for sample 3_CTL04E_500 by condition obs Pseudobulking counts for sample 2_CTL08A_500 by condition obs Pseudobulking counts for sample 1_CTL08A_500 by condition obs Pseudobulking counts for sample 3_CTL08A_500 by condition obs Pseudobulking counts for sample 3_CTL08A_501 by condition obs Pseudobulking counts for sample 3_CTL04E_501 by condition obs Pseudobulking counts for sample 1_CTL08A_501 by condition obs Pseudobulking counts for sample 2_CTL04E_501 by condition obs Pseudobulking counts for sample 1_CTL04E_501 by condition obs Pseudobulking counts for sample 2_CTL08A_501 by condition obs Pseudobulking counts for sample 3_CTL04E_502 by condition obs Pseudobulking counts for sample 3_CTL08A_502 by condition obs Pseudobulking counts for sample 1_CTL04E_502 by condition obs Pseudobulking counts for sample 1_CTL08A_502 by condition obs Pseudobulking counts for sample 2_CTL08A_502 by condition obs Pseudobulking counts for sample 2_CTL04E_502 by condition obs Pseudobulking counts for sample 2_CTL08A_505 by condition obs Pseudobulking counts for sample 2_CTL04E_505 by condition obs Pseudobulking counts for sample 1_CTL08A_505 by condition obs Pseudobulking counts for sample 3_CTL04E_505 by condition obs Pseudobulking counts for sample 1_CTL04E_505 by condition obs Pseudobulking counts for sample 3_CTL08A_505 by condition obs Pseudobulking counts for sample 1_CTL08A_508 by condition obs Pseudobulking counts for sample 1_CTL04E_508 by condition obs Pseudobulking counts for sample 1_CTL08A_509 by condition obs Pseudobulking counts for sample 3_CTL04E_509 by condition obs Pseudobulking counts for sample 1_CTL04E_509 by condition obs Pseudobulking counts for sample 2_CTL04E_509 by condition obs Pseudobulking counts for sample 3_CTL08A_509 by condition obs
def sanitize_strings(value):
if isinstance(value, str): # Only process string values
return (
value.replace('_', '') # Replace underscores with spaces
.replace('-', '') # Replace hyphens with spaces
.replace('@', '') # Example: Remove "@" character
)
return value # Leave non-strings untouched
Counts = adataPbulk.to_df(layer="pseudoBulk_summedCounts").T
MD = adataPbulk.obs[["condition","line"]]
MD["sample"] = MD.index.tolist()
MD = MD.applymap(sanitize_strings).loc[MD.index]
MD
| condition | line | sample | |
|---|---|---|---|
| 3_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 3CTL04E491.ESTROGENAGONIST |
| 2_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 2CTL08A491.ESTROGENAGONIST |
| 3_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 3CTL08A491.ESTROGENAGONIST |
| 2_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 2CTL04E491.ESTROGENAGONIST |
| 1_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 1CTL08A491.ESTROGENAGONIST |
| ... | ... | ... | ... |
| 1_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 1CTL08A509.RETANTAGONIST |
| 3_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 3CTL04E509.RETANTAGONIST |
| 1_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 1CTL04E509.RETANTAGONIST |
| 2_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 2CTL04E509.RETANTAGONIST |
| 3_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 3CTL08A509.RETANTAGONIST |
85 rows × 3 columns
%load_ext rpy2.ipython
pd.DataFrame.iteritems = pd.DataFrame.items
%%R -i MD -i Counts
library(edgeR)
library(org.Hs.eg.db)
library(AnnotationDbi)
library(stats)
library(topGO)
R[write to console]: Loading required package: limma
R[write to console]: Loading required package: AnnotationDbi
R[write to console]: Loading required package: stats4
R[write to console]: Loading required package: BiocGenerics
R[write to console]:
Attaching package: ‘BiocGenerics’
R[write to console]: The following object is masked from ‘package:limma’:
plotMA
R[write to console]: The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
R[write to console]: The following objects are masked from ‘package:base’:
anyDuplicated, aperm, append, as.data.frame, basename, cbind,
colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
table, tapply, union, unique, unsplit, which.max, which.min
R[write to console]: Loading required package: Biobase
R[write to console]: Welcome to Bioconductor
Vignettes contain introductory material; view with
'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
R[write to console]: Loading required package: IRanges
R[write to console]: Loading required package: S4Vectors
R[write to console]:
Attaching package: ‘S4Vectors’
R[write to console]: The following object is masked from ‘package:utils’:
findMatches
R[write to console]: The following objects are masked from ‘package:base’:
expand.grid, I, unname
R[write to console]:
R[write to console]: Loading required package: graph
R[write to console]: Loading required package: GO.db
R[write to console]:
R[write to console]: Loading required package: SparseM
R[write to console]:
Attaching package: ‘SparseM’
R[write to console]: The following object is masked from ‘package:base’:
backsolve
R[write to console]:
groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
R[write to console]:
Attaching package: ‘topGO’
R[write to console]: The following object is masked from ‘package:IRanges’:
members
MD
| condition | line | sample | |
|---|---|---|---|
| 3_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 3CTL04E491.ESTROGENAGONIST |
| 2_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 2CTL08A491.ESTROGENAGONIST |
| 3_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 3CTL08A491.ESTROGENAGONIST |
| 2_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 2CTL04E491.ESTROGENAGONIST |
| 1_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 1CTL08A491.ESTROGENAGONIST |
| ... | ... | ... | ... |
| 1_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 1CTL08A509.RETANTAGONIST |
| 3_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 3CTL04E509.RETANTAGONIST |
| 1_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 1CTL04E509.RETANTAGONIST |
| 2_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 2CTL04E509.RETANTAGONIST |
| 3_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 3CTL08A509.RETANTAGONIST |
85 rows × 3 columns
%%R -i MD -i Counts -o MDS
CondFactor <- factor(MD[["condition"]],
levels = c("DMSO", setdiff(unique(MD[["condition"]]), "DMSO")))
lineFactor <- factor(MD[["line"]],
levels = c("CTL08A","CTL04E"))
mm <- model.matrix(~0+lineFactor+CondFactor )
rownames(mm) <- colnames(Counts)
dds <- DGEList(Counts, group = CondFactor, genes = rownames(Counts))
# Add metadata to dge
dds$samples$line <- lineFactor
dds$samples$nCells <- MD[["nCells"]]
keep.genes<-filterByExpr(dds,design=mm)
print(table(keep.genes))
dds <- dds[keep.genes,, keep.lib.sizes=FALSE]
dds <- calcNormFactors(dds)
# Store MDS
png(tempfile(fileext = ".png"))
MDS <- plotMDS(dds, pch=16, main="MDS")
dev.off()
#Fit
dds <- estimateGLMRobustDisp(dds, mm)
fit <- glmQLFit(dds, mm)
print(dds$samples)
plotBCV(dds)
plotQLDisp(fit)
keep.genes
FALSE TRUE
14985 20492
group lib.size norm.factors
3_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 4506223 1.0105022
2_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 7880413 1.0371210
3_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 10571764 1.0012554
2_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 6635653 0.9742494
1_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 10075647 1.0146013
1_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 6610820 0.9786996
3_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 6472394 1.0837762
1_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 8313620 1.0617799
1_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 5277813 1.0257419
2_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 6584857 1.0396631
2_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 5921627 1.0585013
3_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 9512274 1.0489234
2_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 5889247 1.0675556
3_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 4196274 1.0117911
3_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 10380156 1.0564750
2_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 3587522 1.0239360
1_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 5268490 1.0234005
1_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 8123624 1.0603798
2_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 7308529 1.0333175
2_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 8141237 0.9988268
1_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 6836584 1.0214603
3_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 6162274 1.0340825
3_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 11243091 1.0327739
1_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 11436707 1.0631621
1_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 7959705 0.9579851
2_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 7318266 0.9596384
3_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 9928610 1.0264472
3_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 7402363 0.9501675
1_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 10763131 1.0027056
2_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 9833980 1.0103967
2_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 10442690 0.9764821
2_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 18306133 1.0306764
1_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 17365020 1.0507819
3_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 10888637 0.9707359
1_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 9154621 0.9793797
3_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 17244841 1.0500827
3_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 8768082 0.9629603
2_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 9403110 0.9394795
3_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 10870575 0.9719186
2_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 11363440 0.9901663
1_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 8494381 0.9681336
1_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 11417965 0.9558378
1_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 6546641 1.0146554
3_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 12990696 1.0378899
1_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 9453799 1.0130420
3_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 6273425 1.0120743
2_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 5326492 1.0058744
2_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 12353760 1.0093471
1_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 8002881 0.9723379
2_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 4898268 0.9468700
3_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 4272847 0.9735537
3_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 9558709 0.9846125
1_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 4889538 0.9448780
2_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 6903380 1.0028450
2_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 6180425 0.8940352
1_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 5956997 0.9475558
3_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 4633565 0.9496488
2_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 6023408 0.9533385
1_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 8249680 0.9371614
3_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 7436954 0.9453448
3_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 8637321 1.0191955
3_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 4464469 0.9732673
1_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 8857754 1.0021221
2_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 5411483 0.9785358
1_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 5929016 0.9787484
2_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 8488455 1.0195483
3_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 5449247 0.9555885
3_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 9847129 0.9985029
1_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 2477114 0.9712344
1_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 9094620 0.9828777
2_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 7934447 1.0255668
2_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 5329408 0.9583959
2_CTL08A_505.DMSO DMSO 10464019 0.9803706
2_CTL04E_505.DMSO DMSO 4722699 0.9655423
1_CTL08A_505.DMSO DMSO 8794699 0.9789364
3_CTL04E_505.DMSO DMSO 4854323 0.9742403
1_CTL04E_505.DMSO DMSO 3663543 0.9676696
3_CTL08A_505.DMSO DMSO 7575222 0.9593275
1_CTL08A_508.RET_AGONIST RETAGONIST 17558311 1.1595676
1_CTL04E_508.RET_AGONIST RETAGONIST 16129348 1.1650216
1_CTL08A_509.RET_ANTAGONIST RETANTAGONIST 9977724 1.0316682
3_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 3755716 0.9789962
1_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 4635743 0.9946011
2_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 3840109 0.9874942
3_CTL08A_509.RET_ANTAGONIST RETANTAGONIST 9688062 1.0140777
line
3_CTL04E_491.ESTROGEN_AGONIST CTL04E
2_CTL08A_491.ESTROGEN_AGONIST CTL08A
3_CTL08A_491.ESTROGEN_AGONIST CTL08A
2_CTL04E_491.ESTROGEN_AGONIST CTL04E
1_CTL08A_491.ESTROGEN_AGONIST CTL08A
1_CTL04E_491.ESTROGEN_AGONIST CTL04E
3_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E
1_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A
1_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E
2_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E
2_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A
3_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A
2_CTL08A_493.ANDROGEN_AGONIST CTL08A
3_CTL04E_493.ANDROGEN_AGONIST CTL04E
3_CTL08A_493.ANDROGEN_AGONIST CTL08A
2_CTL04E_493.ANDROGEN_AGONIST CTL04E
1_CTL04E_493.ANDROGEN_AGONIST CTL04E
1_CTL08A_493.ANDROGEN_AGONIST CTL08A
2_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A
2_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E
1_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E
3_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E
3_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A
1_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A
1_CTL04E_495.ARYL_HYD_AGONIST CTL04E
2_CTL04E_495.ARYL_HYD_AGONIST CTL04E
3_CTL08A_495.ARYL_HYD_AGONIST CTL08A
3_CTL04E_495.ARYL_HYD_AGONIST CTL04E
1_CTL08A_495.ARYL_HYD_AGONIST CTL08A
2_CTL08A_495.ARYL_HYD_AGONIST CTL08A
2_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E
2_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A
1_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A
3_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E
1_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E
3_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A
3_CTL04E_497.LIVER-X_AGONIST CTL04E
2_CTL04E_497.LIVER-X_AGONIST CTL04E
3_CTL08A_497.LIVER-X_AGONIST CTL08A
2_CTL08A_497.LIVER-X_AGONIST CTL08A
1_CTL04E_497.LIVER-X_AGONIST CTL04E
1_CTL08A_497.LIVER-X_AGONIST CTL08A
1_CTL04E_498.LIVER-X_ANTAGONIST CTL04E
3_CTL08A_498.LIVER-X_ANTAGONIST CTL08A
1_CTL08A_498.LIVER-X_ANTAGONIST CTL08A
3_CTL04E_498.LIVER-X_ANTAGONIST CTL04E
2_CTL04E_498.LIVER-X_ANTAGONIST CTL04E
2_CTL08A_498.LIVER-X_ANTAGONIST CTL08A
1_CTL08A_499.GLUCOCORT_AGONIST CTL08A
2_CTL04E_499.GLUCOCORT_AGONIST CTL04E
3_CTL04E_499.GLUCOCORT_AGONIST CTL04E
3_CTL08A_499.GLUCOCORT_AGONIST CTL08A
1_CTL04E_499.GLUCOCORT_AGONIST CTL04E
2_CTL08A_499.GLUCOCORT_AGONIST CTL08A
2_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E
1_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E
3_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E
2_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A
1_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A
3_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A
3_CTL08A_501.THYROID_AGONIST CTL08A
3_CTL04E_501.THYROID_AGONIST CTL04E
1_CTL08A_501.THYROID_AGONIST CTL08A
2_CTL04E_501.THYROID_AGONIST CTL04E
1_CTL04E_501.THYROID_AGONIST CTL04E
2_CTL08A_501.THYROID_AGONIST CTL08A
3_CTL04E_502.THYROID_ANTAGONIST CTL04E
3_CTL08A_502.THYROID_ANTAGONIST CTL08A
1_CTL04E_502.THYROID_ANTAGONIST CTL04E
1_CTL08A_502.THYROID_ANTAGONIST CTL08A
2_CTL08A_502.THYROID_ANTAGONIST CTL08A
2_CTL04E_502.THYROID_ANTAGONIST CTL04E
2_CTL08A_505.DMSO CTL08A
2_CTL04E_505.DMSO CTL04E
1_CTL08A_505.DMSO CTL08A
3_CTL04E_505.DMSO CTL04E
1_CTL04E_505.DMSO CTL04E
3_CTL08A_505.DMSO CTL08A
1_CTL08A_508.RET_AGONIST CTL08A
1_CTL04E_508.RET_AGONIST CTL04E
1_CTL08A_509.RET_ANTAGONIST CTL08A
3_CTL04E_509.RET_ANTAGONIST CTL04E
1_CTL04E_509.RET_ANTAGONIST CTL04E
2_CTL04E_509.RET_ANTAGONIST CTL04E
3_CTL08A_509.RET_ANTAGONIST CTL08A
adataPbulk.obsm["MDS"] = np.vstack([MDS["x"],MDS["y"]]).T
sc.pl.embedding(adataPbulk, basis="MDS", color=["line","condition","nCells"], wspace=.6, size=400)
%%R -o SharedConditionVsDmso
AllGenes <- dim(Counts)[1]
SharedConditionVsDmso <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Construct the contrast expression
contrastExpression <- paste("CondFactor", condition, sep = "")
print(contrastExpression)
# Use the correct argument name 'contrasts' in makeContrasts
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
# Remove the extraneous comma and correctly pass all parameters to glmQLFTest
SharedConditionVsDmso[[paste(condition, ".VS.DMSO",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "logFC")$table
# Optionally, print or store the results
}
[1] "CondFactorESTROGENAGONIST" [1] "CondFactorESTROGENANTAGONIST" [1] "CondFactorANDROGENAGONIST" [1] "CondFactorANDROGENANTAGONIST" [1] "CondFactorARYLHYDAGONIST" [1] "CondFactorARYLHYDANTAGONIST" [1] "CondFactorLIVERXAGONIST" [1] "CondFactorLIVERXANTAGONIST" [1] "CondFactorGLUCOCORTAGONIST" [1] "CondFactorGLUCOCORTANTAGONIST" [1] "CondFactorTHYROIDAGONIST" [1] "CondFactorTHYROIDANTAGONIST" [1] "CondFactorRETAGONIST" [1] "CondFactorRETANTAGONIST"
%%R -o SharedAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
SharedAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
print(condition)
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
# Construct the contrast expression; adjust the separator as needed
contrastExpression <- paste("CondFactor", condition, "-", "CondFactor", condAntagonist, sep = "")
# Create the contrast using makeContrasts (ensure mm is defined)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
# Perform the glmQLFTest and extract the topTags table;
# ensure AllGenes is defined as the number of genes to report
SharedAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"LineShared",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast),
adjust.method = "fdr",
n = AllGenes,
sort.by = "logFC")$table
}
}
[1] "ESTROGENAGONIST" [1] "ESTROGENANTAGONIST" [1] "ANDROGENAGONIST" [1] "ANDROGENANTAGONIST" [1] "ARYLHYDAGONIST" [1] "ARYLHYDANTAGONIST" [1] "LIVERXAGONIST" [1] "LIVERXANTAGONIST" [1] "GLUCOCORTAGONIST" [1] "GLUCOCORTANTAGONIST" [1] "THYROIDAGONIST" [1] "THYROIDANTAGONIST" [1] "RETAGONIST" [1] "RETANTAGONIST"
%%R -i MD -i Counts -o MDS
Group <- factor(paste(MD$line,MD$condition,sep="."))
mm <-model.matrix(~0+Group, data=Group)
colnames(mm)<-levels(Group)
rownames(mm)<-rownames(MD)
dds <- DGEList(Counts, group = Group, genes = rownames(Counts))
keep.genes<-filterByExpr(dds,design=mm)
dds <- dds[keep.genes,, keep.lib.sizes=FALSE]
dds <- calcNormFactors(dds)
dds <- estimateGLMRobustDisp(dds, mm)
fit <- glmQLFit(dds, mm)
# Store MDS
png(tempfile(fileext = ".png"))
MDS <- plotMDS(dds, pch=16, main="MDS")
dev.off()
print(dds$samples)
plotBCV(dds)
group lib.size
3_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 4507572
2_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 7882953
3_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 10574899
2_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 6637449
1_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 10078775
1_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 6612690
3_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 6474589
1_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 8316210
1_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 5279470
2_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 6586853
2_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 5923504
3_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 9515096
2_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 5891373
3_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 4197851
3_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 10384185
2_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 3588783
1_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 5270309
1_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 8126510
2_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 7311044
2_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 8143612
1_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 6838932
3_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 6164523
3_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 11246639
1_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 11440904
1_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 7961702
2_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 7320284
3_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 9931432
3_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 7404226
1_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 10766239
2_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 9836854
2_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 10445586
2_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 18311649
1_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 17371527
3_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 10891755
1_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 9157150
3_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 17250384
3_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 8770420
2_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 9405482
3_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 10873305
2_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 11366499
1_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 8496579
1_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 11420634
1_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 6548666
3_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 12994627
1_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 9456556
3_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 6275308
2_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 5328041
2_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 12357385
1_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 8005353
2_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 4899582
3_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 4274187
3_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 9562181
1_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 4890989
2_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 6905806
2_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 6182072
1_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 5958742
3_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 4634932
2_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 6025228
1_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 8252109
3_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 7439342
3_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 8639810
3_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 4466026
1_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 8860278
2_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 5412927
1_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 5930588
2_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 8491023
3_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 5450621
3_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 9849883
1_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 2477807
1_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 9097493
2_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 7936874
2_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 5330888
2_CTL08A_505.DMSO CTL08A.DMSO 10466916
2_CTL04E_505.DMSO CTL04E.DMSO 4724773
1_CTL08A_505.DMSO CTL08A.DMSO 8797791
3_CTL04E_505.DMSO CTL04E.DMSO 4855880
1_CTL04E_505.DMSO CTL04E.DMSO 3664568
3_CTL08A_505.DMSO CTL08A.DMSO 7577513
1_CTL08A_508.RET_AGONIST CTL08A.RETAGONIST 17568712
1_CTL04E_508.RET_AGONIST CTL04E.RETAGONIST 16140576
1_CTL08A_509.RET_ANTAGONIST CTL08A.RETANTAGONIST 9980971
3_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 3756860
1_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 4637133
2_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 3841238
3_CTL08A_509.RET_ANTAGONIST CTL08A.RETANTAGONIST 9690941
norm.factors
3_CTL04E_491.ESTROGEN_AGONIST 1.0116313
2_CTL08A_491.ESTROGEN_AGONIST 1.0367609
3_CTL08A_491.ESTROGEN_AGONIST 0.9977125
2_CTL04E_491.ESTROGEN_AGONIST 0.9780278
1_CTL08A_491.ESTROGEN_AGONIST 1.0098223
1_CTL04E_491.ESTROGEN_AGONIST 0.9830482
3_CTL04E_492.ESTROGEN_ANTAGONIST 1.0848373
1_CTL08A_492.ESTROGEN_ANTAGONIST 1.0634887
1_CTL04E_492.ESTROGEN_ANTAGONIST 1.0349239
2_CTL04E_492.ESTROGEN_ANTAGONIST 1.0422559
2_CTL08A_492.ESTROGEN_ANTAGONIST 1.0638383
3_CTL08A_492.ESTROGEN_ANTAGONIST 1.0538380
2_CTL08A_493.ANDROGEN_AGONIST 1.0641311
3_CTL04E_493.ANDROGEN_AGONIST 1.0232572
3_CTL08A_493.ANDROGEN_AGONIST 1.0415615
2_CTL04E_493.ANDROGEN_AGONIST 1.0251596
1_CTL04E_493.ANDROGEN_AGONIST 1.0255222
1_CTL08A_493.ANDROGEN_AGONIST 1.0494780
2_CTL08A_494.ANDROGEN_ANTAGONIST 1.0441893
2_CTL04E_494.ANDROGEN_ANTAGONIST 1.0037397
1_CTL04E_494.ANDROGEN_ANTAGONIST 1.0339053
3_CTL04E_494.ANDROGEN_ANTAGONIST 1.0515291
3_CTL08A_494.ANDROGEN_ANTAGONIST 1.0359530
1_CTL08A_494.ANDROGEN_ANTAGONIST 1.0622477
1_CTL04E_495.ARYL_HYD_AGONIST 0.9507289
2_CTL04E_495.ARYL_HYD_AGONIST 0.9655621
3_CTL08A_495.ARYL_HYD_AGONIST 1.0153127
3_CTL04E_495.ARYL_HYD_AGONIST 0.9537987
1_CTL08A_495.ARYL_HYD_AGONIST 0.9948123
2_CTL08A_495.ARYL_HYD_AGONIST 1.0037303
2_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9753806
2_CTL08A_496.ARYL_HYD_ANTAGONIST 1.0341837
1_CTL08A_496.ARYL_HYD_ANTAGONIST 1.0479335
3_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9754279
1_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9762744
3_CTL08A_496.ARYL_HYD_ANTAGONIST 1.0585913
3_CTL04E_497.LIVER-X_AGONIST 0.9592887
2_CTL04E_497.LIVER-X_AGONIST 0.9398888
3_CTL08A_497.LIVER-X_AGONIST 0.9643908
2_CTL08A_497.LIVER-X_AGONIST 0.9798928
1_CTL04E_497.LIVER-X_AGONIST 0.9617859
1_CTL08A_497.LIVER-X_AGONIST 0.9515573
1_CTL04E_498.LIVER-X_ANTAGONIST 1.0236481
3_CTL08A_498.LIVER-X_ANTAGONIST 1.0455333
1_CTL08A_498.LIVER-X_ANTAGONIST 1.0247475
3_CTL04E_498.LIVER-X_ANTAGONIST 1.0211916
2_CTL04E_498.LIVER-X_ANTAGONIST 1.0197096
2_CTL08A_498.LIVER-X_ANTAGONIST 1.0125297
1_CTL08A_499.GLUCOCORT_AGONIST 0.9587351
2_CTL04E_499.GLUCOCORT_AGONIST 0.9522264
3_CTL04E_499.GLUCOCORT_AGONIST 0.9727543
3_CTL08A_499.GLUCOCORT_AGONIST 0.9769958
1_CTL04E_499.GLUCOCORT_AGONIST 0.9487468
2_CTL08A_499.GLUCOCORT_AGONIST 1.0055538
2_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9049248
1_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9472611
3_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9546171
2_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9583504
1_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9270345
3_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9481311
3_CTL08A_501.THYROID_AGONIST 1.0056152
3_CTL04E_501.THYROID_AGONIST 0.9804307
1_CTL08A_501.THYROID_AGONIST 0.9975044
2_CTL04E_501.THYROID_AGONIST 0.9761977
1_CTL04E_501.THYROID_AGONIST 0.9798706
2_CTL08A_501.THYROID_AGONIST 1.0080354
3_CTL04E_502.THYROID_ANTAGONIST 0.9643569
3_CTL08A_502.THYROID_ANTAGONIST 0.9943966
1_CTL04E_502.THYROID_ANTAGONIST 0.9822730
1_CTL08A_502.THYROID_ANTAGONIST 0.9789649
2_CTL08A_502.THYROID_ANTAGONIST 1.0103410
2_CTL04E_502.THYROID_ANTAGONIST 0.9710954
2_CTL08A_505.DMSO 0.9718366
2_CTL04E_505.DMSO 0.9683724
1_CTL08A_505.DMSO 0.9647210
3_CTL04E_505.DMSO 0.9792731
1_CTL04E_505.DMSO 0.9711313
3_CTL08A_505.DMSO 0.9600512
1_CTL08A_508.RET_AGONIST 1.1578227
1_CTL04E_508.RET_AGONIST 1.1664596
1_CTL08A_509.RET_ANTAGONIST 1.0102882
3_CTL04E_509.RET_ANTAGONIST 0.9815613
1_CTL04E_509.RET_ANTAGONIST 0.9914735
2_CTL04E_509.RET_ANTAGONIST 0.9785729
3_CTL08A_509.RET_ANTAGONIST 0.9997802
%%R -o univserse
univserse <- rownames(dds)
pd.Series(list(univserse)).to_excel("./{}_{}_DEGs/UNIVERSE_DEGsPerLineAgonistVsAntagonist.xlsx".format(DS, celltype))
adataPbulk.obsm["MDS"] = np.vstack([MDS["x"],MDS["y"]]).T
sc.pl.embedding(adataPbulk, basis="MDS", color=["line","condition","nCells"], wspace=.6, size=400)
%%R
print(head(mm))
print(colnames(mm))
CTL04E.ANDROGENAGONIST CTL04E.ANDROGENANTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.ARYLHYDAGONIST CTL04E.ARYLHYDANTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.DMSO CTL04E.ESTROGENAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 1
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 1
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 1
CTL04E.ESTROGENANTAGONIST CTL04E.GLUCOCORTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.GLUCOCORTANTAGONIST CTL04E.LIVERXAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.LIVERXANTAGONIST CTL04E.RETAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.RETANTAGONIST CTL04E.THYROIDAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL04E.THYROIDANTAGONIST CTL08A.ANDROGENAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.ANDROGENANTAGONIST CTL08A.ARYLHYDAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.ARYLHYDANTAGONIST CTL08A.DMSO
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.ESTROGENAGONIST CTL08A.ESTROGENANTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 1 0
3_CTL08A_491.ESTROGEN_AGONIST 1 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 1 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.GLUCOCORTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0
2_CTL08A_491.ESTROGEN_AGONIST 0
3_CTL08A_491.ESTROGEN_AGONIST 0
2_CTL04E_491.ESTROGEN_AGONIST 0
1_CTL08A_491.ESTROGEN_AGONIST 0
1_CTL04E_491.ESTROGEN_AGONIST 0
CTL08A.GLUCOCORTANTAGONIST CTL08A.LIVERXAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.LIVERXANTAGONIST CTL08A.RETAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.RETANTAGONIST CTL08A.THYROIDAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0 0
2_CTL08A_491.ESTROGEN_AGONIST 0 0
3_CTL08A_491.ESTROGEN_AGONIST 0 0
2_CTL04E_491.ESTROGEN_AGONIST 0 0
1_CTL08A_491.ESTROGEN_AGONIST 0 0
1_CTL04E_491.ESTROGEN_AGONIST 0 0
CTL08A.THYROIDANTAGONIST
3_CTL04E_491.ESTROGEN_AGONIST 0
2_CTL08A_491.ESTROGEN_AGONIST 0
3_CTL08A_491.ESTROGEN_AGONIST 0
2_CTL04E_491.ESTROGEN_AGONIST 0
1_CTL08A_491.ESTROGEN_AGONIST 0
1_CTL04E_491.ESTROGEN_AGONIST 0
[1] "CTL04E.ANDROGENAGONIST" "CTL04E.ANDROGENANTAGONIST"
[3] "CTL04E.ARYLHYDAGONIST" "CTL04E.ARYLHYDANTAGONIST"
[5] "CTL04E.DMSO" "CTL04E.ESTROGENAGONIST"
[7] "CTL04E.ESTROGENANTAGONIST" "CTL04E.GLUCOCORTAGONIST"
[9] "CTL04E.GLUCOCORTANTAGONIST" "CTL04E.LIVERXAGONIST"
[11] "CTL04E.LIVERXANTAGONIST" "CTL04E.RETAGONIST"
[13] "CTL04E.RETANTAGONIST" "CTL04E.THYROIDAGONIST"
[15] "CTL04E.THYROIDANTAGONIST" "CTL08A.ANDROGENAGONIST"
[17] "CTL08A.ANDROGENANTAGONIST" "CTL08A.ARYLHYDAGONIST"
[19] "CTL08A.ARYLHYDANTAGONIST" "CTL08A.DMSO"
[21] "CTL08A.ESTROGENAGONIST" "CTL08A.ESTROGENANTAGONIST"
[23] "CTL08A.GLUCOCORTAGONIST" "CTL08A.GLUCOCORTANTAGONIST"
[25] "CTL08A.LIVERXAGONIST" "CTL08A.LIVERXANTAGONIST"
[27] "CTL08A.RETAGONIST" "CTL08A.RETANTAGONIST"
[29] "CTL08A.THYROIDAGONIST" "CTL08A.THYROIDANTAGONIST"
%%R -o DifferentialEffectAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
DifferentialEffectAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpression <- paste("(CTL04E.", condition, "-", "CTL04E.", condAntagonist,
")-(CTL08A.", condition, "-", "CTL08A.", condAntagonist, ")",
sep = "")
print(contrastExpression)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
DifferentialEffectAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"DifferentialCTL04E-CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
}
[1] "(CTL04E.ESTROGENAGONIST-CTL04E.ESTROGENANTAGONIST)-(CTL08A.ESTROGENAGONIST-CTL08A.ESTROGENANTAGONIST)" [1] "(CTL04E.ANDROGENAGONIST-CTL04E.ANDROGENANTAGONIST)-(CTL08A.ANDROGENAGONIST-CTL08A.ANDROGENANTAGONIST)" [1] "(CTL04E.ARYLHYDAGONIST-CTL04E.ARYLHYDANTAGONIST)-(CTL08A.ARYLHYDAGONIST-CTL08A.ARYLHYDANTAGONIST)" [1] "(CTL04E.LIVERXAGONIST-CTL04E.LIVERXANTAGONIST)-(CTL08A.LIVERXAGONIST-CTL08A.LIVERXANTAGONIST)" [1] "(CTL04E.GLUCOCORTAGONIST-CTL04E.GLUCOCORTANTAGONIST)-(CTL08A.GLUCOCORTAGONIST-CTL08A.GLUCOCORTANTAGONIST)" [1] "(CTL04E.THYROIDAGONIST-CTL04E.THYROIDANTAGONIST)-(CTL08A.THYROIDAGONIST-CTL08A.THYROIDANTAGONIST)" [1] "(CTL04E.RETAGONIST-CTL04E.RETANTAGONIST)-(CTL08A.RETAGONIST-CTL08A.RETANTAGONIST)"
%%R -o PerLineAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
PerLineAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpressionCTL04E <- paste("CTL04E.", condition, "-", "CTL04E.", condAntagonist,sep = "")
print(contrastExpressionCTL04E)
contrastExpressionCTL04E <- makeContrasts(contrasts = contrastExpressionCTL04E, levels = mm)
PerLineAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"_CTL04E",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL04E), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
contrastExpressionCTL08A <- paste("CTL08A.", condition, "-", "CTL08A.", condAntagonist,sep = "")
print(contrastExpressionCTL08A)
contrastExpressionCTL08A <- makeContrasts(contrasts = contrastExpressionCTL08A, levels = mm)
PerLineAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"_CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL08A), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
}
[1] "CTL04E.ESTROGENAGONIST-CTL04E.ESTROGENANTAGONIST" [1] "CTL08A.ESTROGENAGONIST-CTL08A.ESTROGENANTAGONIST" [1] "CTL04E.ANDROGENAGONIST-CTL04E.ANDROGENANTAGONIST" [1] "CTL08A.ANDROGENAGONIST-CTL08A.ANDROGENANTAGONIST" [1] "CTL04E.ARYLHYDAGONIST-CTL04E.ARYLHYDANTAGONIST" [1] "CTL08A.ARYLHYDAGONIST-CTL08A.ARYLHYDANTAGONIST" [1] "CTL04E.LIVERXAGONIST-CTL04E.LIVERXANTAGONIST" [1] "CTL08A.LIVERXAGONIST-CTL08A.LIVERXANTAGONIST" [1] "CTL04E.GLUCOCORTAGONIST-CTL04E.GLUCOCORTANTAGONIST" [1] "CTL08A.GLUCOCORTAGONIST-CTL08A.GLUCOCORTANTAGONIST" [1] "CTL04E.THYROIDAGONIST-CTL04E.THYROIDANTAGONIST" [1] "CTL08A.THYROIDAGONIST-CTL08A.THYROIDANTAGONIST" [1] "CTL04E.RETAGONIST-CTL04E.RETANTAGONIST" [1] "CTL08A.RETAGONIST-CTL08A.RETANTAGONIST"
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Prepare compounds list
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineAgonistVsAntagonist.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 4
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Agonist vs Antagonist logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineAgonistVsAntagonist[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineAgonistVsAntagonist[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
%%R -o PerLineConditionVsDMSO
AllGenes <- dim(Counts)[1]
PerLineConditionVsDMSO <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
contrastExpressionCTL04E <- paste("CTL04E.", condition, "-", "CTL04E.DMSO",sep = "")
print(contrastExpressionCTL04E)
contrastExpressionCTL04E <- makeContrasts(contrasts = contrastExpressionCTL04E, levels = mm)
PerLineConditionVsDMSO[[paste(condition, ".VS.DMSO_CTL04E",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL04E), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
contrastExpressionCTL08A <- paste("CTL08A.", condition, "-", "CTL08A.DMSO",sep = "")
print(contrastExpressionCTL08A)
contrastExpressionCTL08A <- makeContrasts(contrasts = contrastExpressionCTL08A, levels = mm)
PerLineConditionVsDMSO[[paste(condition, ".VS.DMSO_CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL08A), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
[1] "CTL04E.ESTROGENAGONIST-CTL04E.DMSO" [1] "CTL08A.ESTROGENAGONIST-CTL08A.DMSO" [1] "CTL04E.ESTROGENANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ESTROGENANTAGONIST-CTL08A.DMSO" [1] "CTL04E.ANDROGENAGONIST-CTL04E.DMSO" [1] "CTL08A.ANDROGENAGONIST-CTL08A.DMSO" [1] "CTL04E.ANDROGENANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ANDROGENANTAGONIST-CTL08A.DMSO" [1] "CTL04E.ARYLHYDAGONIST-CTL04E.DMSO" [1] "CTL08A.ARYLHYDAGONIST-CTL08A.DMSO" [1] "CTL04E.ARYLHYDANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ARYLHYDANTAGONIST-CTL08A.DMSO" [1] "CTL04E.LIVERXAGONIST-CTL04E.DMSO" [1] "CTL08A.LIVERXAGONIST-CTL08A.DMSO" [1] "CTL04E.LIVERXANTAGONIST-CTL04E.DMSO" [1] "CTL08A.LIVERXANTAGONIST-CTL08A.DMSO" [1] "CTL04E.GLUCOCORTAGONIST-CTL04E.DMSO" [1] "CTL08A.GLUCOCORTAGONIST-CTL08A.DMSO" [1] "CTL04E.GLUCOCORTANTAGONIST-CTL04E.DMSO" [1] "CTL08A.GLUCOCORTANTAGONIST-CTL08A.DMSO" [1] "CTL04E.THYROIDAGONIST-CTL04E.DMSO" [1] "CTL08A.THYROIDAGONIST-CTL08A.DMSO" [1] "CTL04E.THYROIDANTAGONIST-CTL04E.DMSO" [1] "CTL08A.THYROIDANTAGONIST-CTL08A.DMSO" [1] "CTL04E.RETAGONIST-CTL04E.DMSO" [1] "CTL08A.RETAGONIST-CTL08A.DMSO" [1] "CTL04E.RETANTAGONIST-CTL04E.DMSO" [1] "CTL08A.RETANTAGONIST-CTL08A.DMSO"
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineConditionVsDMSO.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 6
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Compound vs DMSO logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineConditionVsDMSO[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineConditionVsDMSO[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Prepare compounds list
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineAgonistVsAntagonist.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 4
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Agonist vs Antagonist logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineAgonistVsAntagonist[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineAgonistVsAntagonist[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
%%R -o DifferentialEffectCompoundVsDMSO
AllGenes <- dim(Counts)[1]
DifferentialEffectCompoundVsDMSO <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpression <- paste("(CTL04E.", condition, "-", "CTL04E.DMSO)-(CTL08A.", condition, "-", "CTL08A.DMSO)",
sep = "")
print(contrastExpression)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
DifferentialEffectCompoundVsDMSO[[paste(condition, ".VS.DMSO.DifferentialCTL04E-CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
[1] "(CTL04E.ESTROGENAGONIST-CTL04E.DMSO)-(CTL08A.ESTROGENAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ESTROGENANTAGONIST-CTL04E.DMSO)-(CTL08A.ESTROGENANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ANDROGENAGONIST-CTL04E.DMSO)-(CTL08A.ANDROGENAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ANDROGENANTAGONIST-CTL04E.DMSO)-(CTL08A.ANDROGENANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ARYLHYDAGONIST-CTL04E.DMSO)-(CTL08A.ARYLHYDAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ARYLHYDANTAGONIST-CTL04E.DMSO)-(CTL08A.ARYLHYDANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.LIVERXAGONIST-CTL04E.DMSO)-(CTL08A.LIVERXAGONIST-CTL08A.DMSO)" [1] "(CTL04E.LIVERXANTAGONIST-CTL04E.DMSO)-(CTL08A.LIVERXANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.GLUCOCORTAGONIST-CTL04E.DMSO)-(CTL08A.GLUCOCORTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.GLUCOCORTANTAGONIST-CTL04E.DMSO)-(CTL08A.GLUCOCORTANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.THYROIDAGONIST-CTL04E.DMSO)-(CTL08A.THYROIDAGONIST-CTL08A.DMSO)" [1] "(CTL04E.THYROIDANTAGONIST-CTL04E.DMSO)-(CTL08A.THYROIDANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.RETAGONIST-CTL04E.DMSO)-(CTL08A.RETAGONIST-CTL08A.DMSO)" [1] "(CTL04E.RETANTAGONIST-CTL04E.DMSO)-(CTL08A.RETANTAGONIST-CTL08A.DMSO)"
FDR = 0.05
dir_path = "./{}_{}_DEGs".format(DS, celltype)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
SharedConditionVsDmso_filtered = {}
for k in list(SharedConditionVsDmso.keys()):
DEGs = SharedConditionVsDmso[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/SharedCompoundVsDmso.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
SharedConditionVsDmso_filtered[k] = DEGs[DEGs["FDR"] < FDR]
SharedAgonistVsAntagonist_filtered = {}
for k in list(SharedAgonistVsAntagonist.keys()):
DEGs = SharedAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/SharedAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
SharedAgonistVsAntagonist_filtered[k] = DEGs[DEGs["FDR"] < FDR]
DifferentialEffectCompoundVsDMSO_filtered = {}
for k in list(DifferentialEffectCompoundVsDMSO.keys()):
DEGs = DifferentialEffectCompoundVsDMSO[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DifferentialEffectCompoundVsDMSO.{}_{}_{}.FDR{}.xlsx".format(DS, celltype, k, celltype, DS,FDR))
DifferentialEffectCompoundVsDMSO_filtered[k] = DEGs[DEGs["FDR"] < FDR]
DifferentialEffectAgonistVsAntagonist_filtered = {}
for k in list(DifferentialEffectAgonistVsAntagonist.keys()):
DEGs = DifferentialEffectAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DifferentialEffectAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype, k, celltype, DS,FDR))
DifferentialEffectAgonistVsAntagonist_filtered[k] = DEGs[DEGs["FDR"] < FDR]
PerLineAgonistVsAntagonist_filtered = {}
for k in list(PerLineAgonistVsAntagonist.keys()):
DEGs = PerLineAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DEGsPerLineAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
PerLineAgonistVsAntagonist_filtered["UP_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] > 2)]
PerLineAgonistVsAntagonist_filtered["DOWN_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] < -2)]
PerLineConditionVsDMSO_filtered = {}
for k in list(PerLineConditionVsDMSO.keys()):
DEGs = PerLineConditionVsDMSO[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DEGsPerLineCompoundVsDMSO.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
PerLineConditionVsDMSO_filtered["UP_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] > 2)]
PerLineConditionVsDMSO_filtered["DOWN_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] < -2)]
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
JaccardDF = pd.DataFrame(index = list(PerLineAgonistVsAntagonist_filtered.keys()), columns = list(PerLineAgonistVsAntagonist_filtered.keys()))
for combination in list(itertools.combinations(list(PerLineAgonistVsAntagonist_filtered.keys()),2)):
set0 = set(PerLineAgonistVsAntagonist_filtered[combination[0]].genes.tolist())
set1 = set(PerLineAgonistVsAntagonist_filtered[combination[1]].genes.tolist())
Int = set0.intersection(set1)
Union = set0.union(set1)
Jaccard = len(Int) / len(Union)
JaccardDF.loc[combination[0],combination[1]] = Jaccard
JaccardDF.loc[combination[1],combination[0]] = Jaccard
JaccardDF = JaccardDF.fillna(1)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Assuming JaccardDF is your DataFrame of Jaccard similarities
# Convert similarity to distance: distance = 1 - similarity
distance_matrix = 1 - JaccardDF.astype(float)
# Optionally, sort the DataFrame for a canonical order
distance_matrix = distance_matrix.sort_index().sort_index(axis=1)
# Create a clustermap using the 'bwr' colormap with improved aesthetics.
g = sns.clustermap(
distance_matrix,
metric="euclidean", # or 'cityblock', 'cosine', etc.
method="average", # clustering method: single, complete, ward, etc.
cmap="bwr_r", # blue-white-red palette
figsize=(15, 15),
cbar_pos=(0.02, 0.8, 0.05, 0.18) # adjust colorbar position if needed
)
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
# The clustermap reorders rows/columns; get the reordered DataFrame.
reordered_data = distance_matrix.iloc[
g.dendrogram_row.reordered_ind, g.dendrogram_col.reordered_ind
]
# Annotate each cell with the corresponding Jaccard similarity value.
# Note: similarity = 1 - distance
for i, row in enumerate(reordered_data.index):
for j, col in enumerate(reordered_data.columns):
d = reordered_data.loc[row, col]
jaccard_sim = 1 - d # Convert back to similarity
g.ax_heatmap.text(
j + 0.5, i + 0.5,
f"{jaccard_sim:.2f}",
ha='center', va='center',
color='black', fontsize=8
)
# Remove grid lines from the heatmap
g.ax_heatmap.grid(False)
# Improve overall aesthetics
g.fig.suptitle("DEGs Jaccard similarity Agonist vs Antagonist", fontsize=16, y=1.05)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=8)
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8)
g.ax_heatmap.tick_params(axis='both', which='both', length=0)
g.cax.set_position([.92, .2, .03, .45]) # adjust the colorbar position if needed
plt.show()
### CONDITION VS DMSO
### CONDITION VS DMSO
### CONDITION VS DMSO
### CONDITION VS DMSO
JaccardDF = pd.DataFrame(index = list(PerLineConditionVsDMSO_filtered.keys()), columns = list(PerLineConditionVsDMSO_filtered.keys()))
for combination in list(itertools.combinations(list(PerLineConditionVsDMSO_filtered.keys()),2)):
set0 = set(PerLineConditionVsDMSO_filtered[combination[0]].genes.tolist())
set1 = set(PerLineConditionVsDMSO_filtered[combination[1]].genes.tolist())
Int = set0.intersection(set1)
Union = set0.union(set1)
Jaccard = len(Int) / len(Union)
JaccardDF.loc[combination[0],combination[1]] = Jaccard
JaccardDF.loc[combination[1],combination[0]] = Jaccard
JaccardDF = JaccardDF.fillna(1)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Assuming JaccardDF is your DataFrame of Jaccard similarities
# Convert similarity to distance: distance = 1 - similarity
distance_matrix = 1 - JaccardDF.astype(float)
# Optionally, sort the DataFrame for a canonical order
distance_matrix = distance_matrix.sort_index().sort_index(axis=1)
# Create a clustermap using the 'bwr' colormap with improved aesthetics.
g = sns.clustermap(
distance_matrix,
metric="euclidean", # or 'cityblock', 'cosine', etc.
method="average", # clustering method: single, complete, ward, etc.
cmap="bwr_r", # blue-white-red palette
figsize=(20, 20),
cbar_pos=(0.02, 0.8, 0.05, 0.18) # adjust colorbar position if needed
)
# Hide the dendrograms (do not display them)
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
# The clustermap reorders rows/columns; get the reordered DataFrame.
reordered_data = distance_matrix.iloc[
g.dendrogram_row.reordered_ind, g.dendrogram_col.reordered_ind
]
# Annotate each cell with the corresponding Jaccard similarity value.
# Note: similarity = 1 - distance
for i, row in enumerate(reordered_data.index):
for j, col in enumerate(reordered_data.columns):
d = reordered_data.loc[row, col]
jaccard_sim = 1 - d # Convert back to similarity
g.ax_heatmap.text(
j + 0.5, i + 0.5,
f"{jaccard_sim:.2f}",
ha='center', va='center',
color='black', fontsize=5
)
# Remove grid lines from the heatmap
g.ax_heatmap.grid(False)
# Improve overall aesthetics
g.fig.suptitle("DEGs Jaccard similarity Compound vs DMSO", fontsize=16, y=1.05)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=8)
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8)
g.ax_heatmap.tick_params(axis='both', which='both', length=0)
g.cax.set_position([.92, .2, .03, .45]) # adjust the colorbar position if needed
plt.show()
CTL04Eresults = pd.concat([PerLineAgonistVsAntagonist_filtered[k].assign(Line="CTL04E").assign(Group=k) for k in list(PerLineAgonistVsAntagonist_filtered.keys()) if "_CTL04E" in k], ignore_index=True)
CTL04Eresults["Direction"] = CTL04Eresults.Group.str.split("_", expand=True)[0]
CTL04Eresults["Contrast"] = CTL04Eresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL04Eresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL04Eresults = CTL04Eresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
CTL08Aresults = pd.concat([PerLineAgonistVsAntagonist_filtered[k].assign(Line="CTL08A").assign(Group=k) for k in list(PerLineAgonistVsAntagonist_filtered.keys()) if "_CTL08A" in k], ignore_index=True)
CTL08Aresults["Direction"] = CTL08Aresults.Group.str.split("_", expand=True)[0]
CTL08Aresults["Contrast"] = CTL08Aresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL08Aresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL08Aresults = CTL08Aresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
import importlib
import matplotlib.pyplot as plt
import seaborn as sns
# Reload matplotlib.pyplot to restore its original state
# Prepare your summary DataFrame indexed by 'Contrast'
summary_results = {}
summary_results["CTL04E"] = CTL04Eresults.set_index('Contrast')[['Up', 'Down', 'Total']]
summary_results["CTL08A"] = CTL08Aresults.set_index('Contrast')[['Up', 'Down', 'Total']]
# Compute global minimum and maximum values across both DataFrames for uniform scaling
global_min = min(df.min().min() for df in summary_results.values())
global_max = max(df.max().max() for df in summary_results.values())
# Create a figure with 1 row and 2 columns of subplots
fig, axes = plt.subplots(1, 2, figsize=(20, 10))
# Loop through the dictionary keys and plot each heatmap on its corresponding axis
for n, k in enumerate(["CTL08A", "CTL04E"]):
ax = axes[n] # get the current axis
show_cbar = (n == 1)
sns.heatmap(
summary_results[k],
annot=True,
annot_kws={"size": 20}, # Increase text size here
fmt="d",
cmap='YlOrBr',
linewidths=0,
linecolor='none',
vmin=global_min, # set uniform minimum
vmax=global_max, # set uniform maximum
ax=ax,
cbar=show_cbar,
cbar_kws={'label': 'Count'} if show_cbar else None,
)
# Remove any visible grid lines by setting the QuadMesh edge color to the face color
ax.collections[0].set_edgecolor("face")
ax.grid(False)
# Set the axis labels and title using the axis methods
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_title(k, fontdict={'size': 25})
# Adjust the layout and display the figure
plt.tight_layout()
plt.show()
CTL04Eresults = pd.concat([PerLineConditionVsDMSO_filtered[k].assign(Line="CTL04E").assign(Group=k) for k in list(PerLineConditionVsDMSO_filtered.keys()) if "_CTL04E" in k], ignore_index=True)
CTL04Eresults["Direction"] = CTL04Eresults.Group.str.split("_", expand=True)[0]
CTL04Eresults["Contrast"] = CTL04Eresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL04Eresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL04Eresults = CTL04Eresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
CTL08Aresults = pd.concat([PerLineConditionVsDMSO_filtered[k].assign(Line="CTL08A").assign(Group=k) for k in list(PerLineConditionVsDMSO_filtered.keys()) if "_CTL08A" in k], ignore_index=True)
CTL08Aresults["Direction"] = CTL08Aresults.Group.str.split("_", expand=True)[0]
CTL08Aresults["Contrast"] = CTL08Aresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL08Aresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL08Aresults = CTL08Aresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
# Reload matplotlib.pyplot to restore its original state
importlib.reload(plt)
# Prepare your summary DataFrame indexed by 'Contrast'
summary_results = {}
summary_results["CTL04E"] = CTL04Eresults.set_index('Contrast')[['Up', 'Down', 'Total']]
summary_results["CTL08A"] = CTL08Aresults.set_index('Contrast')[['Up', 'Down', 'Total']]
# Compute global minimum and maximum values across both DataFrames for uniform scaling
global_min = min(df.min().min() for df in summary_results.values())
global_max = max(df.max().max() for df in summary_results.values())
# Create a figure with 1 row and 2 columns of subplots
fig, axes = plt.subplots(1, 2, figsize=(20, 20))
# Loop through the dictionary keys and plot each heatmap on its corresponding axis
for n, k in enumerate(["CTL08A", "CTL04E"]):
ax = axes[n] # get the current axis
show_cbar = (n == 1)
sns.heatmap(
summary_results[k],
annot=True,
annot_kws={"size": 20}, # Increase text size here
fmt="d",
cmap='YlOrBr',
linewidths=0,
linecolor='none',
vmin=global_min, # set uniform minimum
vmax=global_max, # set uniform maximum
ax=ax,
cbar=show_cbar,
cbar_kws={'label': 'Count'} if show_cbar else None,
)
# Remove any visible grid lines by setting the QuadMesh edge color to the face color
ax.collections[0].set_edgecolor("face")
ax.grid(False)
# Set the axis labels and title using the axis methods
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_title(k, fontdict={'size': 25})
# Adjust the layout and display the figure
plt.tight_layout()
plt.show()
ScatterploDict = {}
for k in list(PerLineConditionVsDMSO.keys()):
Compound = k.split(".")[0]
if Compound not in list(ScatterploDict.keys()):
ScatterploDict[Compound] = pd.DataFrame()
Line = k.split("_")[-1]
DF = PerLineConditionVsDMSO[k].assign(line=Line)
DifferentiallyResponding = DifferentialEffectCompoundVsDMSO_filtered[f"{Compound}.VS.DMSO.DifferentialCTL04E-CTL08A"]
DifferentiallyResponding = DifferentiallyResponding[abs(DifferentiallyResponding["logFC"]) >= 1.5]
DifferentiallyResponding = DifferentiallyResponding[DifferentiallyResponding["FDR"] <= 0.05].genes.tolist()
DF = DF[DF.genes.isin(DifferentiallyResponding)]
ScatterploDict[Compound] = pd.concat([ScatterploDict[Compound], DF], ignore_index=True)
ScatterploDict[Compound]
for Compound in list(ScatterploDict.keys()):
# Assuming ScatterploDict[Compound] is your DataFrame
df = ScatterploDict[Compound]
# Pivot the DataFrame so that each gene is a row and you have one column per line
df_pivot = df.pivot(index='genes', columns='line', values='logFC')
# Optionally, drop genes that don't have logFC values for both lines
df_pivot = df_pivot.fillna(0)
if df_pivot.shape[0] == 0:
print(f"No genes differentially regulated found for {Compound} according to selected filters")
continue
df_pivot["Delta"] = abs(df_pivot["CTL04E"] - df_pivot["CTL08A"])
plt.figure(figsize=(10, 10))
# Optionally scale the marker sizes (e.g., multiply Delta by a constant factor)
marker_sizes = df_pivot["Delta"] * 100 # Adjust scaling factor as needed
# Plot the scatter points using the correct parameter "s" for sizes
plt.scatter(df_pivot['CTL08A'], df_pivot['CTL04E'], alpha=0.2, color='blue', s=marker_sizes)
# Annotate each point with its gene name
for gene in df_pivot.index:
x = df_pivot.loc[gene, 'CTL08A']
y = df_pivot.loc[gene, 'CTL04E']
plt.text(x, y, gene, fontsize=8, alpha=0.8)
# Draw reference lines at 0 (for 0-centered values)
plt.axvline(0, color='gray', linestyle='--')
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel('CTL08A logFC ', fontsize=25)
plt.ylabel('CTL04E logFC ', fontsize=25)
plt.title(Compound, fontsize=25)
plt.tight_layout()
plt.show()