import scanpy as sc
import pandas as pd
from matplotlib import pylab
import random
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import itertools
import plotly.express as px
import yaml
import numpy as np
import random
import anndata as ad
from scipy.sparse import csr_matrix, issparse
from scipy import sparse
from matplotlib.colors import TwoSlopeNorm
import scanpy.external as sce
import sys
import scvelo as scv
import anndata
import rapids_singlecell as rsc
import anndata
from pathlib import Path
try:
nb_fname = ipynbname.name()
except:
nb_fname = os.path.basename(globals()["__vsc_ipynb_file__"])
print(nb_fname)
10.2_Hormones_substudy_LineMagnification_Progenitors.ipynb
sc.settings.set_figure_params(dpi=100, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (9, 9)
homeDir = os.getenv("HOME")
sys.path.insert(1, homeDir+"/utils/")
from PlotPCA_components import *
from AtlasClasses import *
DS="hormones_substudy"
celltype="Progenitors"
ReferenceTissue="cortex"
with open(homeDir+"/utils/ReferenceDict.yaml", 'r') as file:
ReferencePaths = yaml.safe_load(file)
for k in list(ReferencePaths.keys()):
ReferencePaths[k]["adataPath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["adataPath"]
#ReferencePaths[k]["signaturePath"] = homeDir+ReferencePaths[k]["signaturePath"]
ReferencePaths[k]["signaturePath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["signaturePath"]
ReferencePaths[k]["signaturePurityPath"] = "/group/testa/Users/davide.castaldi/Polaroids_spinoff"+ReferencePaths[k]["signaturePurityPath"]
sc.settings.set_figure_params(dpi=80, facecolor='white', dpi_save=500)
pylab.rcParams['figure.figsize'] = (6, 6)
dir_path = homeDir+"/adatas/{}_ByMacroCelltype".format(DS)
pcs=15
n_neighbors=30
seed = 437
adata = sc.read_h5ad(dir_path+"/{}.{}.h5ad".format(DS,celltype))
adata
AnnData object with n_obs × n_vars = 43164 × 36601 obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype' var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap' uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap' obsm: 'X_pca', 'X_pca_harmony', 'X_umap' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances'
# Clean ribo, mito anmd sec genes
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
adata = adata[:,~adata.var_names.str.startswith("MT-")].copy()
# ribosomal genes
adata = adata[:,~adata.var_names.str.startswith(("RPS", "RPL"))].copy()
# Sex genes
print(adata)
adata = adata[:,~adata.var_names.isin(pd.read_csv(homeDir+"/resources/X.genes.tsv", header=None)[0].tolist())].copy()
adata = adata[:,~adata.var_names.isin(pd.read_csv(homeDir+"/resources/Y.genes.tsv", header=None)[0].tolist())].copy()
print(adata)
# Re-normalize
adata.X = adata.layers["counts"].copy()
sc.pp.normalize_total(adata, 20000)
sc.pp.log1p(adata)
adata.layers["logNorm"] = adata.X.copy()
AnnData object with n_obs × n_vars = 43164 × 36485 obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype' var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap' uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap' obsm: 'X_pca', 'X_pca_harmony', 'X_umap' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances' AnnData object with n_obs × n_vars = 43164 × 35477 obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype' var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap' uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap' obsm: 'X_pca', 'X_pca_harmony', 'X_umap' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances' WARNING: adata.X seems to be already log-transformed.
sc.pl.umap(adata, color=["line","condition"])
adata.obs["condition"].value_counts()
condition ARYL_HYD_ANTAGONIST 4979 ANDROGEN_ANTAGONIST 4155 LIVER-X_ANTAGONIST 4091 ESTROGEN_AGONIST 3920 RET_AGONIST 3884 ESTROGEN_ANTAGONIST 3514 THYROID_ANTAGONIST 2952 ANDROGEN_AGONIST 2464 DMSO 2333 ARYL_HYD_AGONIST 2201 RET_ANTAGONIST 1841 GLUCOCORT_AGONIST 1738 GLUCOCORT_ANTAGONIST 1729 LIVER-X_AGONIST 1704 THYROID_AGONIST 1659 Name: count, dtype: int64
HVGSCondict = {}
for line in adata.obs["line"].unique().tolist():
adataGenotype = adata[adata.obs["line"] == line]
for condition in adataGenotype.obs["condition"].unique().tolist():
adataCOnd = adataGenotype[adataGenotype.obs["condition"] == condition].copy()
if condition not in list(HVGSCondict.keys()):
HVGSCondict[condition] = []
#Print Nsamples
nsamples = len(adataCOnd.obs["sample_id"].unique().tolist())
print(f"Found {nsamples} repliacates for {line} {condition}")
if nsamples < 3:
minOverlap = nsamples
else:
minOverlap = nsamples-1
print(f"Only HVGs common to {minOverlap} replicates will be kept")
sc.pp.highly_variable_genes(adataCOnd, batch_key="sample_id", flavor="seurat", n_top_genes=1000)
CommonHVGs = set(adataCOnd.var_names[adataCOnd.var["highly_variable_nbatches"] >= minOverlap])
nHVGs = len(CommonHVGs)
print(f"Found {nHVGs} common HVGs across {minOverlap} replicates for line {line} exposure {condition} ")
print("\n")
HVGSCondict[condition].append(CommonHVGs)
Found 3 repliacates for CTL04E ESTROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 558 common HVGs across 2 replicates for line CTL04E exposure ESTROGEN_AGONIST Found 3 repliacates for CTL04E ESTROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 559 common HVGs across 2 replicates for line CTL04E exposure ESTROGEN_ANTAGONIST Found 3 repliacates for CTL04E ANDROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 506 common HVGs across 2 replicates for line CTL04E exposure ANDROGEN_AGONIST Found 3 repliacates for CTL04E ANDROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 576 common HVGs across 2 replicates for line CTL04E exposure ANDROGEN_ANTAGONIST Found 3 repliacates for CTL04E ARYL_HYD_AGONIST Only HVGs common to 2 replicates will be kept Found 555 common HVGs across 2 replicates for line CTL04E exposure ARYL_HYD_AGONIST Found 3 repliacates for CTL04E ARYL_HYD_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 648 common HVGs across 2 replicates for line CTL04E exposure ARYL_HYD_ANTAGONIST Found 3 repliacates for CTL04E LIVER-X_AGONIST Only HVGs common to 2 replicates will be kept Found 517 common HVGs across 2 replicates for line CTL04E exposure LIVER-X_AGONIST Found 3 repliacates for CTL04E LIVER-X_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 691 common HVGs across 2 replicates for line CTL04E exposure LIVER-X_ANTAGONIST Found 3 repliacates for CTL04E GLUCOCORT_AGONIST Only HVGs common to 2 replicates will be kept Found 524 common HVGs across 2 replicates for line CTL04E exposure GLUCOCORT_AGONIST Found 3 repliacates for CTL04E GLUCOCORT_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 515 common HVGs across 2 replicates for line CTL04E exposure GLUCOCORT_ANTAGONIST Found 3 repliacates for CTL04E THYROID_AGONIST Only HVGs common to 2 replicates will be kept Found 516 common HVGs across 2 replicates for line CTL04E exposure THYROID_AGONIST Found 3 repliacates for CTL04E THYROID_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 511 common HVGs across 2 replicates for line CTL04E exposure THYROID_ANTAGONIST Found 3 repliacates for CTL04E DMSO Only HVGs common to 2 replicates will be kept Found 524 common HVGs across 2 replicates for line CTL04E exposure DMSO Found 1 repliacates for CTL04E RET_AGONIST Only HVGs common to 1 replicates will be kept Found 1001 common HVGs across 1 replicates for line CTL04E exposure RET_AGONIST Found 3 repliacates for CTL04E RET_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 529 common HVGs across 2 replicates for line CTL04E exposure RET_ANTAGONIST Found 3 repliacates for CTL08A ESTROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 493 common HVGs across 2 replicates for line CTL08A exposure ESTROGEN_AGONIST Found 3 repliacates for CTL08A ESTROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 573 common HVGs across 2 replicates for line CTL08A exposure ESTROGEN_ANTAGONIST Found 3 repliacates for CTL08A ANDROGEN_AGONIST Only HVGs common to 2 replicates will be kept Found 460 common HVGs across 2 replicates for line CTL08A exposure ANDROGEN_AGONIST Found 3 repliacates for CTL08A ANDROGEN_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 537 common HVGs across 2 replicates for line CTL08A exposure ANDROGEN_ANTAGONIST Found 3 repliacates for CTL08A ARYL_HYD_AGONIST Only HVGs common to 2 replicates will be kept Found 520 common HVGs across 2 replicates for line CTL08A exposure ARYL_HYD_AGONIST Found 3 repliacates for CTL08A ARYL_HYD_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 642 common HVGs across 2 replicates for line CTL08A exposure ARYL_HYD_ANTAGONIST Found 3 repliacates for CTL08A LIVER-X_AGONIST Only HVGs common to 2 replicates will be kept Found 580 common HVGs across 2 replicates for line CTL08A exposure LIVER-X_AGONIST Found 3 repliacates for CTL08A LIVER-X_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 692 common HVGs across 2 replicates for line CTL08A exposure LIVER-X_ANTAGONIST Found 3 repliacates for CTL08A GLUCOCORT_AGONIST Only HVGs common to 2 replicates will be kept Found 498 common HVGs across 2 replicates for line CTL08A exposure GLUCOCORT_AGONIST Found 3 repliacates for CTL08A GLUCOCORT_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 507 common HVGs across 2 replicates for line CTL08A exposure GLUCOCORT_ANTAGONIST Found 3 repliacates for CTL08A THYROID_AGONIST Only HVGs common to 2 replicates will be kept Found 523 common HVGs across 2 replicates for line CTL08A exposure THYROID_AGONIST Found 3 repliacates for CTL08A THYROID_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 497 common HVGs across 2 replicates for line CTL08A exposure THYROID_ANTAGONIST Found 3 repliacates for CTL08A DMSO Only HVGs common to 2 replicates will be kept Found 556 common HVGs across 2 replicates for line CTL08A exposure DMSO Found 1 repliacates for CTL08A RET_AGONIST Only HVGs common to 1 replicates will be kept Found 1000 common HVGs across 1 replicates for line CTL08A exposure RET_AGONIST Found 2 repliacates for CTL08A RET_ANTAGONIST Only HVGs common to 2 replicates will be kept Found 388 common HVGs across 2 replicates for line CTL08A exposure RET_ANTAGONIST
sc.pp.highly_variable_genes(adata, batch_key="line", flavor="seurat", n_top_genes=1000)
HVGSLine = set(adata.var_names[adata.var["highly_variable_nbatches"] >=1].tolist())
nHVGSLine = len(HVGSLine)
print(f"Found {nHVGSLine} common pan-condition HVGs across both lines ")
Found 1521 common pan-condition HVGs across both lines
##### First HVGs set
##### First HVGs set
IntersectionDEGs = []
for k in list(HVGSCondict.keys()):
IntersectionDEGsCond = list(set.union(*HVGSCondict[k]))
nIntersectionDEGsCond = len(IntersectionDEGsCond)
#print(f"Found {nIntersectionDEGsCond} HVGs from the intersection of both genotypes replicates for exposure {condition} ")
IntersectionDEGs.extend(IntersectionDEGsCond)
IntersectionDEGs = set(IntersectionDEGs)
nIntersectionDEGsCond = len(IntersectionDEGs)
print(f"Found {nIntersectionDEGsCond} HVGs from the union of both genotypes replicates for exposure {condition} ")
##### Second HVGs set
##### Second HVGs set
FinalHVGs = list(IntersectionDEGs.union(HVGSLine))
nFinalHVGs = len(FinalHVGs)
print(f"Found {nFinalHVGs} HVGs from the union of both genotypes replicates and across exposures ")
Found 3957 HVGs from the union of both genotypes replicates for exposure RET_ANTAGONIST Found 4430 HVGs from the union of both genotypes replicates and across exposures
adata
AnnData object with n_obs × n_vars = 43164 × 35477 obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run', 'sample_id', 'groupCov', 'harmonizedRegion', 'tissue', 'PCW', 'Singlet', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'log_pct_counts_hb', 'log_total_counts', 'study', 'hormones_substudy', 'EDCs_substudy', 'controls_substudy', 'region', 'study_Annotation', 'ingestdLabels', 'scANVILabels', 'harmonyLabels', 'phase', 'S_score', 'G2M_score', 'Consensus_nIntersection', 'Consensus', 'Consensus_call', 'PassedQCfilt_ScoreSignature_Hypoxia', 'PassedQCfilt_ScoreSignature_ER_stress', 'PassedQCfilt_ScoreSignature_necroptotic_process', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death', 'Leiden_1', 'MacroCelltype' var: 'gene_ids', 'feature_types', 'genome', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'highly_variable_seurat_overlap' uns: 'AtlasTile_registry', 'Consensus_call_colors', 'Leiden_1_colors', 'PassedQCfilt_ScoreSignature_ER_stress_colors', 'PassedQCfilt_ScoreSignature_Hypoxia_colors', 'PassedQCfilt_ScoreSignature_necroptotic_process_colors', 'PassedQCfilt_ScoreSignature_programmed_necrotic_cell_death_colors', 'condition_colors', 'hvg', 'ingestdLabels_colors', 'leiden', 'line_colors', 'log1p', 'neighbors', 'pca', 'phase_colors', 'rank_genes_groups', 'umap' obsm: 'X_pca', 'X_pca_harmony', 'X_umap' varm: 'PCs' layers: 'counts', 'logNorm' obsp: 'connectivities', 'distances'
adata.var["highly_variable_seurat_overlap"] = adata.var_names.isin(FinalHVGs)
print(adata.var["highly_variable_seurat_overlap"].sum())
adata.var["highly_variable"] = adata.var["highly_variable_seurat_overlap"]
sc.pp.scale(adata)
adata.layers["scaled"] = adata.X.copy()
sc.pp.pca(adata, use_highly_variable=True)
sc.pp.neighbors(adata, n_pcs=pcs, n_neighbors=n_neighbors, random_state=seed)
adata.X = adata.layers["logNorm"].copy()
4430
sc.tl.umap(adata, random_state=seed)
sc.tl.draw_graph(adata)
sc.pl.umap(adata, color=["line","condition","Consensus_call"], size=10, vmin='p1', vmax='p99', wspace=.2, ncols=2)
sc.pl.draw_graph(adata, color=["line","condition","Consensus_call"], size=10, vmin='p1', vmax='p99', wspace=.2, ncols=2)
print(adata.X.max())
print(adata.X.min())
9.056654 0.0
del adata.layers["scaled"]
del adata.layers["logNorm"]
# Store results
adata.write_h5ad(dir_path+"/{}.{}.LineMagnification.h5ad".format(DS,celltype))
adata = sc.read_h5ad(dir_path+"/{}.{}.LineMagnification.h5ad".format(DS,celltype))
adataPbulk = adata.copy()
adataPbulk.obs["line_condition"] = adataPbulk.obs["line"].astype(str)+"_"+ adataPbulk.obs["condition"].astype(str)
import hashlib Xtab = pd.crosstab(adataPbulk.obs["condition"],adataPbulk.obs["line"]) print(Xtab) minCells = Xtab.min().min() Compositionhash = hashlib.sha256(Xtab.sort_index(axis=1).to_csv(index=True).encode('utf-8')).hexdigest()
SelectedCells = [] SelectedCellsFile = "./{}.{}.selectedBCs.txt".format(nb_fname,Compositionhash) if os.path.isfile(SelectedCellsFile): print("Loading existing cells subset") SelectedCells = pd.read_csv(SelectedCellsFile)["0"].tolist() else: print("File not found or compositions are changed, performing downsampling to balance conditions/lines") for line in adataPbulk.obs["line_condition"].unique(): pbulkCond = adataPbulk[(adataPbulk.obs["line_condition"] == line)]
pbulkCondCells = random.sample(pbulkCond.obs_names.tolist(), minCells)
SelectedCells.extend(pbulkCondCells)
pd.Series(SelectedCells).to_csv(SelectedCellsFile)
adataPbulk = adataPbulk[SelectedCells] pd.crosstab(adataPbulk.obs["condition"],adataPbulk.obs["line"])
adataPbulk = AtlasTile(adataPbulk, regionObs="harmonizedRegion", tissueObs="tissue", ageObs="PCW", study=DS,scalevar=True, groupCov="groupCov")
adataPbulk.AggregateCounts(method="pseudoBulk", groupcov="sample_id", layer="counts", cellStateObs="condition", prenormalize=False, postnormalize=False)
adataPbulk = adataPbulk.uns["pseudoBulkadata.layer_counts.by_sample_id_condition"].copy()
Pseudobulking counts for sample 3_CTL04E_491 by condition obs Pseudobulking counts for sample 2_CTL08A_491 by condition obs Pseudobulking counts for sample 3_CTL08A_491 by condition obs Pseudobulking counts for sample 2_CTL04E_491 by condition obs Pseudobulking counts for sample 1_CTL08A_491 by condition obs Pseudobulking counts for sample 1_CTL04E_491 by condition obs Pseudobulking counts for sample 3_CTL04E_492 by condition obs Pseudobulking counts for sample 1_CTL08A_492 by condition obs Pseudobulking counts for sample 1_CTL04E_492 by condition obs Pseudobulking counts for sample 2_CTL04E_492 by condition obs Pseudobulking counts for sample 2_CTL08A_492 by condition obs Pseudobulking counts for sample 3_CTL08A_492 by condition obs Pseudobulking counts for sample 2_CTL08A_493 by condition obs Pseudobulking counts for sample 3_CTL04E_493 by condition obs Pseudobulking counts for sample 3_CTL08A_493 by condition obs Pseudobulking counts for sample 2_CTL04E_493 by condition obs Pseudobulking counts for sample 1_CTL04E_493 by condition obs Pseudobulking counts for sample 1_CTL08A_493 by condition obs Pseudobulking counts for sample 2_CTL08A_494 by condition obs Pseudobulking counts for sample 2_CTL04E_494 by condition obs Pseudobulking counts for sample 1_CTL04E_494 by condition obs Pseudobulking counts for sample 3_CTL04E_494 by condition obs Pseudobulking counts for sample 3_CTL08A_494 by condition obs Pseudobulking counts for sample 1_CTL08A_494 by condition obs Pseudobulking counts for sample 1_CTL04E_495 by condition obs Pseudobulking counts for sample 2_CTL04E_495 by condition obs Pseudobulking counts for sample 3_CTL08A_495 by condition obs Pseudobulking counts for sample 3_CTL04E_495 by condition obs Pseudobulking counts for sample 1_CTL08A_495 by condition obs Pseudobulking counts for sample 2_CTL08A_495 by condition obs Pseudobulking counts for sample 2_CTL04E_496 by condition obs Pseudobulking counts for sample 2_CTL08A_496 by condition obs Pseudobulking counts for sample 1_CTL08A_496 by condition obs Pseudobulking counts for sample 3_CTL04E_496 by condition obs Pseudobulking counts for sample 1_CTL04E_496 by condition obs Pseudobulking counts for sample 3_CTL08A_496 by condition obs Pseudobulking counts for sample 3_CTL04E_497 by condition obs Pseudobulking counts for sample 2_CTL04E_497 by condition obs Pseudobulking counts for sample 3_CTL08A_497 by condition obs Pseudobulking counts for sample 2_CTL08A_497 by condition obs Pseudobulking counts for sample 1_CTL04E_497 by condition obs Pseudobulking counts for sample 1_CTL08A_497 by condition obs Pseudobulking counts for sample 1_CTL04E_498 by condition obs Pseudobulking counts for sample 3_CTL08A_498 by condition obs Pseudobulking counts for sample 1_CTL08A_498 by condition obs Pseudobulking counts for sample 3_CTL04E_498 by condition obs Pseudobulking counts for sample 2_CTL04E_498 by condition obs Pseudobulking counts for sample 2_CTL08A_498 by condition obs Pseudobulking counts for sample 1_CTL08A_499 by condition obs Pseudobulking counts for sample 2_CTL04E_499 by condition obs Pseudobulking counts for sample 3_CTL04E_499 by condition obs Pseudobulking counts for sample 3_CTL08A_499 by condition obs Pseudobulking counts for sample 1_CTL04E_499 by condition obs Pseudobulking counts for sample 2_CTL08A_499 by condition obs Pseudobulking counts for sample 2_CTL04E_500 by condition obs Pseudobulking counts for sample 1_CTL04E_500 by condition obs Pseudobulking counts for sample 3_CTL04E_500 by condition obs Pseudobulking counts for sample 2_CTL08A_500 by condition obs Pseudobulking counts for sample 1_CTL08A_500 by condition obs Pseudobulking counts for sample 3_CTL08A_500 by condition obs Pseudobulking counts for sample 3_CTL08A_501 by condition obs Pseudobulking counts for sample 3_CTL04E_501 by condition obs Pseudobulking counts for sample 1_CTL08A_501 by condition obs Pseudobulking counts for sample 2_CTL04E_501 by condition obs Pseudobulking counts for sample 1_CTL04E_501 by condition obs Pseudobulking counts for sample 2_CTL08A_501 by condition obs Pseudobulking counts for sample 3_CTL04E_502 by condition obs Pseudobulking counts for sample 3_CTL08A_502 by condition obs Pseudobulking counts for sample 1_CTL04E_502 by condition obs Pseudobulking counts for sample 1_CTL08A_502 by condition obs Pseudobulking counts for sample 2_CTL08A_502 by condition obs Pseudobulking counts for sample 2_CTL04E_502 by condition obs Pseudobulking counts for sample 2_CTL08A_505 by condition obs Pseudobulking counts for sample 2_CTL04E_505 by condition obs Pseudobulking counts for sample 1_CTL08A_505 by condition obs Pseudobulking counts for sample 3_CTL04E_505 by condition obs Pseudobulking counts for sample 1_CTL04E_505 by condition obs Pseudobulking counts for sample 3_CTL08A_505 by condition obs Pseudobulking counts for sample 1_CTL08A_508 by condition obs Pseudobulking counts for sample 1_CTL04E_508 by condition obs Pseudobulking counts for sample 1_CTL08A_509 by condition obs Pseudobulking counts for sample 3_CTL04E_509 by condition obs Pseudobulking counts for sample 1_CTL04E_509 by condition obs Pseudobulking counts for sample 2_CTL04E_509 by condition obs Pseudobulking counts for sample 3_CTL08A_509 by condition obs
def sanitize_strings(value):
if isinstance(value, str): # Only process string values
return (
value.replace('_', '') # Replace underscores with spaces
.replace('-', '') # Replace hyphens with spaces
.replace('@', '') # Example: Remove "@" character
)
return value # Leave non-strings untouched
Counts = adataPbulk.to_df(layer="pseudoBulk_summedCounts").T
MD = adataPbulk.obs[["condition","line"]]
MD["sample"] = MD.index.tolist()
MD = MD.applymap(sanitize_strings).loc[MD.index]
MD
condition | line | sample | |
---|---|---|---|
3_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 3CTL04E491.ESTROGENAGONIST |
2_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 2CTL08A491.ESTROGENAGONIST |
3_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 3CTL08A491.ESTROGENAGONIST |
2_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 2CTL04E491.ESTROGENAGONIST |
1_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 1CTL08A491.ESTROGENAGONIST |
... | ... | ... | ... |
1_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 1CTL08A509.RETANTAGONIST |
3_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 3CTL04E509.RETANTAGONIST |
1_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 1CTL04E509.RETANTAGONIST |
2_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 2CTL04E509.RETANTAGONIST |
3_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 3CTL08A509.RETANTAGONIST |
85 rows × 3 columns
%load_ext rpy2.ipython
pd.DataFrame.iteritems = pd.DataFrame.items
The rpy2.ipython extension is already loaded. To reload it, use: %reload_ext rpy2.ipython
%%R -i MD -i Counts
library(edgeR)
library(org.Hs.eg.db)
library(AnnotationDbi)
library(stats)
library(topGO)
R[write to console]: Loading required package: limma R[write to console]: Loading required package: AnnotationDbi R[write to console]: Loading required package: stats4 R[write to console]: Loading required package: BiocGenerics R[write to console]: Attaching package: ‘BiocGenerics’ R[write to console]: The following object is masked from ‘package:limma’: plotMA R[write to console]: The following objects are masked from ‘package:stats’: IQR, mad, sd, var, xtabs R[write to console]: The following objects are masked from ‘package:base’: anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which.max, which.min R[write to console]: Loading required package: Biobase R[write to console]: Welcome to Bioconductor Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'. R[write to console]: Loading required package: IRanges R[write to console]: Loading required package: S4Vectors R[write to console]: Attaching package: ‘S4Vectors’ R[write to console]: The following object is masked from ‘package:utils’: findMatches R[write to console]: The following objects are masked from ‘package:base’: expand.grid, I, unname R[write to console]: R[write to console]: Loading required package: graph R[write to console]: Loading required package: GO.db R[write to console]: R[write to console]: Loading required package: SparseM R[write to console]: Attaching package: ‘SparseM’ R[write to console]: The following object is masked from ‘package:base’: backsolve R[write to console]: groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built. R[write to console]: Attaching package: ‘topGO’ R[write to console]: The following object is masked from ‘package:IRanges’: members
MD
condition | line | sample | |
---|---|---|---|
3_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 3CTL04E491.ESTROGENAGONIST |
2_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 2CTL08A491.ESTROGENAGONIST |
3_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 3CTL08A491.ESTROGENAGONIST |
2_CTL04E_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL04E | 2CTL04E491.ESTROGENAGONIST |
1_CTL08A_491.ESTROGEN_AGONIST | ESTROGENAGONIST | CTL08A | 1CTL08A491.ESTROGENAGONIST |
... | ... | ... | ... |
1_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 1CTL08A509.RETANTAGONIST |
3_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 3CTL04E509.RETANTAGONIST |
1_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 1CTL04E509.RETANTAGONIST |
2_CTL04E_509.RET_ANTAGONIST | RETANTAGONIST | CTL04E | 2CTL04E509.RETANTAGONIST |
3_CTL08A_509.RET_ANTAGONIST | RETANTAGONIST | CTL08A | 3CTL08A509.RETANTAGONIST |
85 rows × 3 columns
%%R -i MD -i Counts -o MDS
CondFactor <- factor(MD[["condition"]],
levels = c("DMSO", setdiff(unique(MD[["condition"]]), "DMSO")))
lineFactor <- factor(MD[["line"]],
levels = c("CTL08A","CTL04E"))
mm <- model.matrix(~0+lineFactor+CondFactor )
rownames(mm) <- colnames(Counts)
dds <- DGEList(Counts, group = CondFactor, genes = rownames(Counts))
# Add metadata to dge
dds$samples$line <- lineFactor
dds$samples$nCells <- MD[["nCells"]]
keep.genes<-filterByExpr(dds,design=mm)
print(table(keep.genes))
dds <- dds[keep.genes,, keep.lib.sizes=FALSE]
dds <- calcNormFactors(dds)
# Store MDS
png(tempfile(fileext = ".png"))
MDS <- plotMDS(dds, pch=16, main="MDS")
dev.off()
#Fit
dds <- estimateGLMRobustDisp(dds, mm)
fit <- glmQLFit(dds, mm)
print(dds$samples)
plotBCV(dds)
plotQLDisp(fit)
keep.genes FALSE TRUE 18378 17099 group lib.size norm.factors 3_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 4423473 0.9938465 2_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 2512090 1.0077688 3_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 3439236 0.9917460 2_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 4830660 0.9745080 1_CTL08A_491.ESTROGEN_AGONIST ESTROGENAGONIST 3474053 1.0004240 1_CTL04E_491.ESTROGEN_AGONIST ESTROGENAGONIST 5166547 0.9756648 3_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 2962526 1.0960258 1_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 4417441 1.0425709 1_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 2874090 1.0402190 2_CTL04E_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 3369016 1.0545351 2_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 3763823 1.0345933 3_CTL08A_492.ESTROGEN_ANTAGONIST ESTROGENANTAGONIST 4184552 1.0368455 2_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 1988692 1.0437305 3_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 2565500 1.0215710 3_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 2812934 1.0295907 2_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 2004050 1.0085290 1_CTL04E_493.ANDROGEN_AGONIST ANDROGENAGONIST 2851940 1.0200451 1_CTL08A_493.ANDROGEN_AGONIST ANDROGENAGONIST 2195680 1.0378173 2_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 2462589 1.0309499 2_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 5144448 1.0124633 1_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 5778573 1.0306182 3_CTL04E_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 4445737 1.0346039 3_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 1801990 1.0154079 1_CTL08A_494.ANDROGEN_ANTAGONIST ANDROGENANTAGONIST 3716041 1.0332832 1_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 3630508 0.9754919 2_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 3058652 0.9684283 3_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 1604786 1.0063004 3_CTL04E_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 3498771 0.9728194 1_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 2242548 0.9994667 2_CTL08A_495.ARYL_HYD_AGONIST ARYLHYDAGONIST 1478612 1.0041808 2_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 8510035 0.9813685 2_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 3858791 0.9973786 1_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 3995416 1.0015983 3_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 10650102 0.9774394 1_CTL04E_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 9317474 0.9729097 3_CTL08A_496.ARYL_HYD_ANTAGONIST ARYLHYDANTAGONIST 2736227 1.0074878 3_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 2345504 0.9732396 2_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 1922886 0.9890393 3_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 1919090 0.9861535 2_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 2016639 0.9836470 1_CTL04E_497.LIVER-X_AGONIST LIVERXAGONIST 2105208 0.9730657 1_CTL08A_497.LIVER-X_AGONIST LIVERXAGONIST 1377734 0.9790765 1_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 7299989 1.0204161 3_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 4438532 1.0450472 1_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 3652151 1.0318353 3_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 7999084 1.0261994 2_CTL04E_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 6835637 1.0224048 2_CTL08A_498.LIVER-X_ANTAGONIST LIVERXANTAGONIST 3512888 1.0252983 1_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 1803835 0.9487867 2_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 2167339 0.9768888 3_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 1887427 0.9775628 3_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 2303271 0.9500966 1_CTL04E_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 1757962 0.9676751 2_CTL08A_499.GLUCOCORT_AGONIST GLUCOCORTAGONIST 1209493 0.9844379 2_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 1561222 0.9389632 1_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 2347530 0.9500315 3_CTL04E_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 1632353 0.9758253 2_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 1618093 0.9438703 1_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 2201478 0.9207906 3_CTL08A_500.GLUCOCORT_ANTAGONIST GLUCOCORTANTAGONIST 2073223 0.9430869 3_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 1761744 1.0176930 3_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 2369575 1.0028793 1_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 2332264 1.0130421 2_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 2421023 0.9880429 1_CTL04E_501.THYROID_AGONIST THYROIDAGONIST 3088771 0.9961016 2_CTL08A_501.THYROID_AGONIST THYROIDAGONIST 1066907 1.0149836 3_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 3964137 0.9951326 3_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 2392829 0.9989727 1_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 2163874 0.9923870 1_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 2515493 0.9975653 2_CTL08A_502.THYROID_ANTAGONIST THYROIDANTAGONIST 1892818 1.0070596 2_CTL04E_502.THYROID_ANTAGONIST THYROIDANTAGONIST 4000189 0.9892277 2_CTL08A_505.DMSO DMSO 2403268 0.9840651 2_CTL04E_505.DMSO DMSO 3462783 0.9993232 1_CTL08A_505.DMSO DMSO 2584191 0.9935466 3_CTL04E_505.DMSO DMSO 4186498 1.0098125 1_CTL04E_505.DMSO DMSO 2587872 1.0083204 3_CTL08A_505.DMSO DMSO 2106415 0.9859619 1_CTL08A_508.RET_AGONIST RETAGONIST 9216400 1.0074486 1_CTL04E_508.RET_AGONIST RETAGONIST 8571922 1.0037813 1_CTL08A_509.RET_ANTAGONIST RETANTAGONIST 2021938 1.0098215 3_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 2978142 1.0178903 1_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 3194628 1.0261241 2_CTL04E_509.RET_ANTAGONIST RETANTAGONIST 2778720 1.0113240 3_CTL08A_509.RET_ANTAGONIST RETANTAGONIST 2383531 0.9994200 line 3_CTL04E_491.ESTROGEN_AGONIST CTL04E 2_CTL08A_491.ESTROGEN_AGONIST CTL08A 3_CTL08A_491.ESTROGEN_AGONIST CTL08A 2_CTL04E_491.ESTROGEN_AGONIST CTL04E 1_CTL08A_491.ESTROGEN_AGONIST CTL08A 1_CTL04E_491.ESTROGEN_AGONIST CTL04E 3_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E 1_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A 1_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E 2_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E 2_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A 3_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A 2_CTL08A_493.ANDROGEN_AGONIST CTL08A 3_CTL04E_493.ANDROGEN_AGONIST CTL04E 3_CTL08A_493.ANDROGEN_AGONIST CTL08A 2_CTL04E_493.ANDROGEN_AGONIST CTL04E 1_CTL04E_493.ANDROGEN_AGONIST CTL04E 1_CTL08A_493.ANDROGEN_AGONIST CTL08A 2_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A 2_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E 1_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E 3_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E 3_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A 1_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A 1_CTL04E_495.ARYL_HYD_AGONIST CTL04E 2_CTL04E_495.ARYL_HYD_AGONIST CTL04E 3_CTL08A_495.ARYL_HYD_AGONIST CTL08A 3_CTL04E_495.ARYL_HYD_AGONIST CTL04E 1_CTL08A_495.ARYL_HYD_AGONIST CTL08A 2_CTL08A_495.ARYL_HYD_AGONIST CTL08A 2_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E 2_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A 1_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A 3_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E 1_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E 3_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A 3_CTL04E_497.LIVER-X_AGONIST CTL04E 2_CTL04E_497.LIVER-X_AGONIST CTL04E 3_CTL08A_497.LIVER-X_AGONIST CTL08A 2_CTL08A_497.LIVER-X_AGONIST CTL08A 1_CTL04E_497.LIVER-X_AGONIST CTL04E 1_CTL08A_497.LIVER-X_AGONIST CTL08A 1_CTL04E_498.LIVER-X_ANTAGONIST CTL04E 3_CTL08A_498.LIVER-X_ANTAGONIST CTL08A 1_CTL08A_498.LIVER-X_ANTAGONIST CTL08A 3_CTL04E_498.LIVER-X_ANTAGONIST CTL04E 2_CTL04E_498.LIVER-X_ANTAGONIST CTL04E 2_CTL08A_498.LIVER-X_ANTAGONIST CTL08A 1_CTL08A_499.GLUCOCORT_AGONIST CTL08A 2_CTL04E_499.GLUCOCORT_AGONIST CTL04E 3_CTL04E_499.GLUCOCORT_AGONIST CTL04E 3_CTL08A_499.GLUCOCORT_AGONIST CTL08A 1_CTL04E_499.GLUCOCORT_AGONIST CTL04E 2_CTL08A_499.GLUCOCORT_AGONIST CTL08A 2_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E 1_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E 3_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E 2_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A 1_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A 3_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A 3_CTL08A_501.THYROID_AGONIST CTL08A 3_CTL04E_501.THYROID_AGONIST CTL04E 1_CTL08A_501.THYROID_AGONIST CTL08A 2_CTL04E_501.THYROID_AGONIST CTL04E 1_CTL04E_501.THYROID_AGONIST CTL04E 2_CTL08A_501.THYROID_AGONIST CTL08A 3_CTL04E_502.THYROID_ANTAGONIST CTL04E 3_CTL08A_502.THYROID_ANTAGONIST CTL08A 1_CTL04E_502.THYROID_ANTAGONIST CTL04E 1_CTL08A_502.THYROID_ANTAGONIST CTL08A 2_CTL08A_502.THYROID_ANTAGONIST CTL08A 2_CTL04E_502.THYROID_ANTAGONIST CTL04E 2_CTL08A_505.DMSO CTL08A 2_CTL04E_505.DMSO CTL04E 1_CTL08A_505.DMSO CTL08A 3_CTL04E_505.DMSO CTL04E 1_CTL04E_505.DMSO CTL04E 3_CTL08A_505.DMSO CTL08A 1_CTL08A_508.RET_AGONIST CTL08A 1_CTL04E_508.RET_AGONIST CTL04E 1_CTL08A_509.RET_ANTAGONIST CTL08A 3_CTL04E_509.RET_ANTAGONIST CTL04E 1_CTL04E_509.RET_ANTAGONIST CTL04E 2_CTL04E_509.RET_ANTAGONIST CTL04E 3_CTL08A_509.RET_ANTAGONIST CTL08A
adataPbulk.obsm["MDS"] = np.vstack([MDS["x"],MDS["y"]]).T
sc.pl.embedding(adataPbulk, basis="MDS", color=["line","condition","nCells"], wspace=.6, size=400)
%%R -o SharedConditionVsDmso
AllGenes <- dim(Counts)[1]
SharedConditionVsDmso <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Construct the contrast expression
contrastExpression <- paste("CondFactor", condition, sep = "")
print(contrastExpression)
# Use the correct argument name 'contrasts' in makeContrasts
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
# Remove the extraneous comma and correctly pass all parameters to glmQLFTest
SharedConditionVsDmso[[paste(condition, ".VS.DMSO",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "logFC")$table
# Optionally, print or store the results
}
[1] "CondFactorESTROGENAGONIST" [1] "CondFactorESTROGENANTAGONIST" [1] "CondFactorANDROGENAGONIST" [1] "CondFactorANDROGENANTAGONIST" [1] "CondFactorARYLHYDAGONIST" [1] "CondFactorARYLHYDANTAGONIST" [1] "CondFactorLIVERXAGONIST" [1] "CondFactorLIVERXANTAGONIST" [1] "CondFactorGLUCOCORTAGONIST" [1] "CondFactorGLUCOCORTANTAGONIST" [1] "CondFactorTHYROIDAGONIST" [1] "CondFactorTHYROIDANTAGONIST" [1] "CondFactorRETAGONIST" [1] "CondFactorRETANTAGONIST"
%%R -o SharedAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
SharedAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
print(condition)
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
# Construct the contrast expression; adjust the separator as needed
contrastExpression <- paste("CondFactor", condition, "-", "CondFactor", condAntagonist, sep = "")
# Create the contrast using makeContrasts (ensure mm is defined)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
# Perform the glmQLFTest and extract the topTags table;
# ensure AllGenes is defined as the number of genes to report
SharedAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"LineShared",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast),
adjust.method = "fdr",
n = AllGenes,
sort.by = "logFC")$table
}
}
[1] "ESTROGENAGONIST" [1] "ESTROGENANTAGONIST" [1] "ANDROGENAGONIST" [1] "ANDROGENANTAGONIST" [1] "ARYLHYDAGONIST" [1] "ARYLHYDANTAGONIST" [1] "LIVERXAGONIST" [1] "LIVERXANTAGONIST" [1] "GLUCOCORTAGONIST" [1] "GLUCOCORTANTAGONIST" [1] "THYROIDAGONIST" [1] "THYROIDANTAGONIST" [1] "RETAGONIST" [1] "RETANTAGONIST"
%%R -i MD -i Counts -o MDS
Group <- factor(paste(MD$line,MD$condition,sep="."))
mm <-model.matrix(~0+Group, data=Group)
colnames(mm)<-levels(Group)
rownames(mm)<-rownames(MD)
dds <- DGEList(Counts, group = Group, genes = rownames(Counts))
keep.genes<-filterByExpr(dds,design=mm)
dds <- dds[keep.genes,, keep.lib.sizes=FALSE]
dds <- calcNormFactors(dds)
dds <- estimateGLMRobustDisp(dds, mm)
fit <- glmQLFit(dds, mm)
# Store MDS
png(tempfile(fileext = ".png"))
MDS <- plotMDS(dds, pch=16, main="MDS")
dev.off()
print(dds$samples)
plotBCV(dds)
group lib.size 3_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 4427367 2_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 2514436 3_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 3442351 2_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 4834386 1_CTL08A_491.ESTROGEN_AGONIST CTL08A.ESTROGENAGONIST 3476933 1_CTL04E_491.ESTROGEN_AGONIST CTL04E.ESTROGENAGONIST 5170759 3_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 2965808 1_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 4421506 1_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 2876663 2_CTL04E_492.ESTROGEN_ANTAGONIST CTL04E.ESTROGENANTAGONIST 3372338 2_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 3767400 3_CTL08A_492.ESTROGEN_ANTAGONIST CTL08A.ESTROGENANTAGONIST 4188254 2_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 1990877 3_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 2567998 3_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 2816289 2_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 2005829 1_CTL04E_493.ANDROGEN_AGONIST CTL04E.ANDROGENAGONIST 2854669 1_CTL08A_493.ANDROGEN_AGONIST CTL08A.ANDROGENAGONIST 2197971 2_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 2464967 2_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 5149391 1_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 5784351 3_CTL04E_494.ANDROGEN_ANTAGONIST CTL04E.ANDROGENANTAGONIST 4450075 3_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 1803613 1_CTL08A_494.ANDROGEN_ANTAGONIST CTL08A.ANDROGENANTAGONIST 3720011 1_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 3633031 2_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 3060783 3_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 1606040 3_CTL04E_495.ARYL_HYD_AGONIST CTL04E.ARYLHYDAGONIST 3501067 1_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 2244137 2_CTL08A_495.ARYL_HYD_AGONIST CTL08A.ARYLHYDAGONIST 1479713 2_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 8516203 2_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 3861848 1_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 3999145 3_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 10657828 1_CTL04E_496.ARYL_HYD_ANTAGONIST CTL04E.ARYLHYDANTAGONIST 9323790 3_CTL08A_496.ARYL_HYD_ANTAGONIST CTL08A.ARYLHYDANTAGONIST 2738449 3_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 2347256 2_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 1925638 3_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 1920433 2_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 2018195 1_CTL04E_497.LIVER-X_AGONIST CTL04E.LIVERXAGONIST 2106748 1_CTL08A_497.LIVER-X_AGONIST CTL08A.LIVERXAGONIST 1378708 1_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 7305800 3_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 4442417 1_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 3655092 3_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 8005354 2_CTL04E_498.LIVER-X_ANTAGONIST CTL04E.LIVERXANTAGONIST 6840843 2_CTL08A_498.LIVER-X_ANTAGONIST CTL08A.LIVERXANTAGONIST 3515767 1_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 1805590 2_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 2169085 3_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 1889182 3_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 2305049 1_CTL04E_499.GLUCOCORT_AGONIST CTL04E.GLUCOCORTAGONIST 1759638 2_CTL08A_499.GLUCOCORT_AGONIST CTL08A.GLUCOCORTAGONIST 1210958 2_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 1562322 1_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 2349427 3_CTL04E_500.GLUCOCORT_ANTAGONIST CTL04E.GLUCOCORTANTAGONIST 1633786 2_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 1619307 1_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 2203131 3_CTL08A_500.GLUCOCORT_ANTAGONIST CTL08A.GLUCOCORTANTAGONIST 2074793 3_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 1763337 3_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 2371491 1_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 2334211 2_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 2422919 1_CTL04E_501.THYROID_AGONIST CTL04E.THYROIDAGONIST 3091258 2_CTL08A_501.THYROID_AGONIST CTL08A.THYROIDAGONIST 1067833 3_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 3967397 3_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 2394795 1_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 2165687 1_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 2517516 2_CTL08A_502.THYROID_ANTAGONIST CTL08A.THYROIDANTAGONIST 1894532 2_CTL04E_502.THYROID_ANTAGONIST CTL04E.THYROIDANTAGONIST 4003403 2_CTL08A_505.DMSO CTL08A.DMSO 2405123 2_CTL04E_505.DMSO CTL04E.DMSO 3465587 1_CTL08A_505.DMSO CTL08A.DMSO 2586260 3_CTL04E_505.DMSO CTL04E.DMSO 4190069 1_CTL04E_505.DMSO CTL04E.DMSO 2590098 3_CTL08A_505.DMSO CTL08A.DMSO 2107954 1_CTL08A_508.RET_AGONIST CTL08A.RETAGONIST 9229327 1_CTL04E_508.RET_AGONIST CTL04E.RETAGONIST 8584248 1_CTL08A_509.RET_ANTAGONIST CTL08A.RETANTAGONIST 2023842 3_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 2980842 1_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 3197554 2_CTL04E_509.RET_ANTAGONIST CTL04E.RETANTAGONIST 2781065 3_CTL08A_509.RET_ANTAGONIST CTL08A.RETANTAGONIST 2385323 norm.factors 3_CTL04E_491.ESTROGEN_AGONIST 0.9999694 2_CTL08A_491.ESTROGEN_AGONIST 1.0172450 3_CTL08A_491.ESTROGEN_AGONIST 1.0035874 2_CTL04E_491.ESTROGEN_AGONIST 0.9909131 1_CTL08A_491.ESTROGEN_AGONIST 1.0071580 1_CTL04E_491.ESTROGEN_AGONIST 0.9940378 3_CTL04E_492.ESTROGEN_ANTAGONIST 1.0868014 1_CTL08A_492.ESTROGEN_ANTAGONIST 1.0326826 1_CTL04E_492.ESTROGEN_ANTAGONIST 1.0431401 2_CTL04E_492.ESTROGEN_ANTAGONIST 1.0509193 2_CTL08A_492.ESTROGEN_ANTAGONIST 1.0272320 3_CTL08A_492.ESTROGEN_ANTAGONIST 1.0278696 2_CTL08A_493.ANDROGEN_AGONIST 1.0558711 3_CTL04E_493.ANDROGEN_AGONIST 1.0297472 3_CTL08A_493.ANDROGEN_AGONIST 1.0335419 2_CTL04E_493.ANDROGEN_AGONIST 1.0178997 1_CTL04E_493.ANDROGEN_AGONIST 1.0241940 1_CTL08A_493.ANDROGEN_AGONIST 1.0424441 2_CTL08A_494.ANDROGEN_ANTAGONIST 1.0364886 2_CTL04E_494.ANDROGEN_ANTAGONIST 1.0169741 1_CTL04E_494.ANDROGEN_ANTAGONIST 1.0316251 3_CTL04E_494.ANDROGEN_ANTAGONIST 1.0368018 3_CTL08A_494.ANDROGEN_ANTAGONIST 1.0159233 1_CTL08A_494.ANDROGEN_ANTAGONIST 1.0366874 1_CTL04E_495.ARYL_HYD_AGONIST 0.9666301 2_CTL04E_495.ARYL_HYD_AGONIST 0.9636368 3_CTL08A_495.ARYL_HYD_AGONIST 1.0061025 3_CTL04E_495.ARYL_HYD_AGONIST 0.9576880 1_CTL08A_495.ARYL_HYD_AGONIST 0.9856165 2_CTL08A_495.ARYL_HYD_AGONIST 1.0026297 2_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9793711 2_CTL08A_496.ARYL_HYD_ANTAGONIST 0.9981406 1_CTL08A_496.ARYL_HYD_ANTAGONIST 1.0073763 3_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9769057 1_CTL04E_496.ARYL_HYD_ANTAGONIST 0.9704311 3_CTL08A_496.ARYL_HYD_ANTAGONIST 1.0105092 3_CTL04E_497.LIVER-X_AGONIST 0.9628673 2_CTL04E_497.LIVER-X_AGONIST 0.9899891 3_CTL08A_497.LIVER-X_AGONIST 0.9829504 2_CTL08A_497.LIVER-X_AGONIST 0.9754438 1_CTL04E_497.LIVER-X_AGONIST 0.9671078 1_CTL08A_497.LIVER-X_AGONIST 0.9756540 1_CTL04E_498.LIVER-X_ANTAGONIST 1.0124261 3_CTL08A_498.LIVER-X_ANTAGONIST 1.0229033 1_CTL08A_498.LIVER-X_ANTAGONIST 1.0173400 3_CTL04E_498.LIVER-X_ANTAGONIST 1.0157928 2_CTL04E_498.LIVER-X_ANTAGONIST 1.0072546 2_CTL08A_498.LIVER-X_ANTAGONIST 1.0178636 1_CTL08A_499.GLUCOCORT_AGONIST 0.9704569 2_CTL04E_499.GLUCOCORT_AGONIST 0.9715258 3_CTL04E_499.GLUCOCORT_AGONIST 0.9927524 3_CTL08A_499.GLUCOCORT_AGONIST 0.9644021 1_CTL04E_499.GLUCOCORT_AGONIST 0.9813597 2_CTL08A_499.GLUCOCORT_AGONIST 1.0034942 2_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9444915 1_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9668535 3_CTL04E_500.GLUCOCORT_ANTAGONIST 0.9931692 2_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9596988 1_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9247260 3_CTL08A_500.GLUCOCORT_ANTAGONIST 0.9425308 3_CTL08A_501.THYROID_AGONIST 1.0209377 3_CTL04E_501.THYROID_AGONIST 0.9982616 1_CTL08A_501.THYROID_AGONIST 1.0092869 2_CTL04E_501.THYROID_AGONIST 0.9870428 1_CTL04E_501.THYROID_AGONIST 0.9924600 2_CTL08A_501.THYROID_AGONIST 1.0184360 3_CTL04E_502.THYROID_ANTAGONIST 0.9959688 3_CTL08A_502.THYROID_ANTAGONIST 0.9982748 1_CTL04E_502.THYROID_ANTAGONIST 0.9967005 1_CTL08A_502.THYROID_ANTAGONIST 0.9962978 2_CTL08A_502.THYROID_ANTAGONIST 1.0145049 2_CTL04E_502.THYROID_ANTAGONIST 0.9859290 2_CTL08A_505.DMSO 0.9783423 2_CTL04E_505.DMSO 0.9943559 1_CTL08A_505.DMSO 0.9819910 3_CTL04E_505.DMSO 1.0067293 1_CTL04E_505.DMSO 1.0026432 3_CTL08A_505.DMSO 0.9768475 1_CTL08A_508.RET_AGONIST 0.9785361 1_CTL04E_508.RET_AGONIST 0.9791249 1_CTL08A_509.RET_ANTAGONIST 1.0115447 3_CTL04E_509.RET_ANTAGONIST 1.0184315 1_CTL04E_509.RET_ANTAGONIST 1.0328630 2_CTL04E_509.RET_ANTAGONIST 1.0129691 3_CTL08A_509.RET_ANTAGONIST 0.9958377
%%R -o univserse
univserse <- rownames(dds)
pd.Series(list(univserse)).to_excel("./{}_{}_DEGs/UNIVERSE_DEGsPerLineAgonistVsAntagonist.xlsx".format(DS, celltype))
adataPbulk.obsm["MDS"] = np.vstack([MDS["x"],MDS["y"]]).T
sc.pl.embedding(adataPbulk, basis="MDS", color=["line","condition","nCells"], wspace=.6, size=400)
%%R
print(head(mm))
print(colnames(mm))
CTL04E.ANDROGENAGONIST CTL04E.ANDROGENANTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.ARYLHYDAGONIST CTL04E.ARYLHYDANTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.DMSO CTL04E.ESTROGENAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 1 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 1 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 1 CTL04E.ESTROGENANTAGONIST CTL04E.GLUCOCORTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.GLUCOCORTANTAGONIST CTL04E.LIVERXAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.LIVERXANTAGONIST CTL04E.RETAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.RETANTAGONIST CTL04E.THYROIDAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL04E.THYROIDANTAGONIST CTL08A.ANDROGENAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.ANDROGENANTAGONIST CTL08A.ARYLHYDAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.ARYLHYDANTAGONIST CTL08A.DMSO 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.ESTROGENAGONIST CTL08A.ESTROGENANTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 1 0 3_CTL08A_491.ESTROGEN_AGONIST 1 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 1 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.GLUCOCORTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 2_CTL08A_491.ESTROGEN_AGONIST 0 3_CTL08A_491.ESTROGEN_AGONIST 0 2_CTL04E_491.ESTROGEN_AGONIST 0 1_CTL08A_491.ESTROGEN_AGONIST 0 1_CTL04E_491.ESTROGEN_AGONIST 0 CTL08A.GLUCOCORTANTAGONIST CTL08A.LIVERXAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.LIVERXANTAGONIST CTL08A.RETAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.RETANTAGONIST CTL08A.THYROIDAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 0 2_CTL08A_491.ESTROGEN_AGONIST 0 0 3_CTL08A_491.ESTROGEN_AGONIST 0 0 2_CTL04E_491.ESTROGEN_AGONIST 0 0 1_CTL08A_491.ESTROGEN_AGONIST 0 0 1_CTL04E_491.ESTROGEN_AGONIST 0 0 CTL08A.THYROIDANTAGONIST 3_CTL04E_491.ESTROGEN_AGONIST 0 2_CTL08A_491.ESTROGEN_AGONIST 0 3_CTL08A_491.ESTROGEN_AGONIST 0 2_CTL04E_491.ESTROGEN_AGONIST 0 1_CTL08A_491.ESTROGEN_AGONIST 0 1_CTL04E_491.ESTROGEN_AGONIST 0 [1] "CTL04E.ANDROGENAGONIST" "CTL04E.ANDROGENANTAGONIST" [3] "CTL04E.ARYLHYDAGONIST" "CTL04E.ARYLHYDANTAGONIST" [5] "CTL04E.DMSO" "CTL04E.ESTROGENAGONIST" [7] "CTL04E.ESTROGENANTAGONIST" "CTL04E.GLUCOCORTAGONIST" [9] "CTL04E.GLUCOCORTANTAGONIST" "CTL04E.LIVERXAGONIST" [11] "CTL04E.LIVERXANTAGONIST" "CTL04E.RETAGONIST" [13] "CTL04E.RETANTAGONIST" "CTL04E.THYROIDAGONIST" [15] "CTL04E.THYROIDANTAGONIST" "CTL08A.ANDROGENAGONIST" [17] "CTL08A.ANDROGENANTAGONIST" "CTL08A.ARYLHYDAGONIST" [19] "CTL08A.ARYLHYDANTAGONIST" "CTL08A.DMSO" [21] "CTL08A.ESTROGENAGONIST" "CTL08A.ESTROGENANTAGONIST" [23] "CTL08A.GLUCOCORTAGONIST" "CTL08A.GLUCOCORTANTAGONIST" [25] "CTL08A.LIVERXAGONIST" "CTL08A.LIVERXANTAGONIST" [27] "CTL08A.RETAGONIST" "CTL08A.RETANTAGONIST" [29] "CTL08A.THYROIDAGONIST" "CTL08A.THYROIDANTAGONIST"
%%R -o DifferentialEffectAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
DifferentialEffectAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpression <- paste("(CTL04E.", condition, "-", "CTL04E.", condAntagonist,
")-(CTL08A.", condition, "-", "CTL08A.", condAntagonist, ")",
sep = "")
print(contrastExpression)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
DifferentialEffectAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"DifferentialCTL04E-CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
}
[1] "(CTL04E.ESTROGENAGONIST-CTL04E.ESTROGENANTAGONIST)-(CTL08A.ESTROGENAGONIST-CTL08A.ESTROGENANTAGONIST)" [1] "(CTL04E.ANDROGENAGONIST-CTL04E.ANDROGENANTAGONIST)-(CTL08A.ANDROGENAGONIST-CTL08A.ANDROGENANTAGONIST)" [1] "(CTL04E.ARYLHYDAGONIST-CTL04E.ARYLHYDANTAGONIST)-(CTL08A.ARYLHYDAGONIST-CTL08A.ARYLHYDANTAGONIST)" [1] "(CTL04E.LIVERXAGONIST-CTL04E.LIVERXANTAGONIST)-(CTL08A.LIVERXAGONIST-CTL08A.LIVERXANTAGONIST)" [1] "(CTL04E.GLUCOCORTAGONIST-CTL04E.GLUCOCORTANTAGONIST)-(CTL08A.GLUCOCORTAGONIST-CTL08A.GLUCOCORTANTAGONIST)" [1] "(CTL04E.THYROIDAGONIST-CTL04E.THYROIDANTAGONIST)-(CTL08A.THYROIDAGONIST-CTL08A.THYROIDANTAGONIST)" [1] "(CTL04E.RETAGONIST-CTL04E.RETANTAGONIST)-(CTL08A.RETAGONIST-CTL08A.RETANTAGONIST)"
%%R -o PerLineAgonistVsAntagonist
AllGenes <- dim(Counts)[1]
PerLineAgonistVsAntagonist <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
if (!grepl("ANTAGONIST", condition)) {
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpressionCTL04E <- paste("CTL04E.", condition, "-", "CTL04E.", condAntagonist,sep = "")
print(contrastExpressionCTL04E)
contrastExpressionCTL04E <- makeContrasts(contrasts = contrastExpressionCTL04E, levels = mm)
PerLineAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"_CTL04E",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL04E), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
contrastExpressionCTL08A <- paste("CTL08A.", condition, "-", "CTL08A.", condAntagonist,sep = "")
print(contrastExpressionCTL08A)
contrastExpressionCTL08A <- makeContrasts(contrasts = contrastExpressionCTL08A, levels = mm)
PerLineAgonistVsAntagonist[[paste(condition, ".VS.",condAntagonist,"_CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL08A), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
}
[1] "CTL04E.ESTROGENAGONIST-CTL04E.ESTROGENANTAGONIST" [1] "CTL08A.ESTROGENAGONIST-CTL08A.ESTROGENANTAGONIST" [1] "CTL04E.ANDROGENAGONIST-CTL04E.ANDROGENANTAGONIST" [1] "CTL08A.ANDROGENAGONIST-CTL08A.ANDROGENANTAGONIST" [1] "CTL04E.ARYLHYDAGONIST-CTL04E.ARYLHYDANTAGONIST" [1] "CTL08A.ARYLHYDAGONIST-CTL08A.ARYLHYDANTAGONIST" [1] "CTL04E.LIVERXAGONIST-CTL04E.LIVERXANTAGONIST" [1] "CTL08A.LIVERXAGONIST-CTL08A.LIVERXANTAGONIST" [1] "CTL04E.GLUCOCORTAGONIST-CTL04E.GLUCOCORTANTAGONIST" [1] "CTL08A.GLUCOCORTAGONIST-CTL08A.GLUCOCORTANTAGONIST" [1] "CTL04E.THYROIDAGONIST-CTL04E.THYROIDANTAGONIST" [1] "CTL08A.THYROIDAGONIST-CTL08A.THYROIDANTAGONIST" [1] "CTL04E.RETAGONIST-CTL04E.RETANTAGONIST" [1] "CTL08A.RETAGONIST-CTL08A.RETANTAGONIST"
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Prepare compounds list
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineAgonistVsAntagonist.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 4
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Agonist vs Antagonist logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineAgonistVsAntagonist[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineAgonistVsAntagonist[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
%%R -o PerLineConditionVsDMSO
AllGenes <- dim(Counts)[1]
PerLineConditionVsDMSO <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
contrastExpressionCTL04E <- paste("CTL04E.", condition, "-", "CTL04E.DMSO",sep = "")
print(contrastExpressionCTL04E)
contrastExpressionCTL04E <- makeContrasts(contrasts = contrastExpressionCTL04E, levels = mm)
PerLineConditionVsDMSO[[paste(condition, ".VS.DMSO_CTL04E",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL04E), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
contrastExpressionCTL08A <- paste("CTL08A.", condition, "-", "CTL08A.DMSO",sep = "")
print(contrastExpressionCTL08A)
contrastExpressionCTL08A <- makeContrasts(contrasts = contrastExpressionCTL08A, levels = mm)
PerLineConditionVsDMSO[[paste(condition, ".VS.DMSO_CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = contrastExpressionCTL08A), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
[1] "CTL04E.ESTROGENAGONIST-CTL04E.DMSO" [1] "CTL08A.ESTROGENAGONIST-CTL08A.DMSO" [1] "CTL04E.ESTROGENANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ESTROGENANTAGONIST-CTL08A.DMSO" [1] "CTL04E.ANDROGENAGONIST-CTL04E.DMSO" [1] "CTL08A.ANDROGENAGONIST-CTL08A.DMSO" [1] "CTL04E.ANDROGENANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ANDROGENANTAGONIST-CTL08A.DMSO" [1] "CTL04E.ARYLHYDAGONIST-CTL04E.DMSO" [1] "CTL08A.ARYLHYDAGONIST-CTL08A.DMSO" [1] "CTL04E.ARYLHYDANTAGONIST-CTL04E.DMSO" [1] "CTL08A.ARYLHYDANTAGONIST-CTL08A.DMSO" [1] "CTL04E.LIVERXAGONIST-CTL04E.DMSO" [1] "CTL08A.LIVERXAGONIST-CTL08A.DMSO" [1] "CTL04E.LIVERXANTAGONIST-CTL04E.DMSO" [1] "CTL08A.LIVERXANTAGONIST-CTL08A.DMSO" [1] "CTL04E.GLUCOCORTAGONIST-CTL04E.DMSO" [1] "CTL08A.GLUCOCORTAGONIST-CTL08A.DMSO" [1] "CTL04E.GLUCOCORTANTAGONIST-CTL04E.DMSO" [1] "CTL08A.GLUCOCORTANTAGONIST-CTL08A.DMSO" [1] "CTL04E.THYROIDAGONIST-CTL04E.DMSO" [1] "CTL08A.THYROIDAGONIST-CTL08A.DMSO" [1] "CTL04E.THYROIDANTAGONIST-CTL04E.DMSO" [1] "CTL08A.THYROIDANTAGONIST-CTL08A.DMSO" [1] "CTL04E.RETAGONIST-CTL04E.DMSO" [1] "CTL08A.RETAGONIST-CTL08A.DMSO" [1] "CTL04E.RETANTAGONIST-CTL04E.DMSO" [1] "CTL08A.RETANTAGONIST-CTL08A.DMSO"
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineConditionVsDMSO.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 6
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Compound vs DMSO logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineConditionVsDMSO[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineConditionVsDMSO[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Prepare compounds list
CompoundsList = sorted(set("_".join(i.split("_")[:-1]) for i in PerLineAgonistVsAntagonist.keys()))
# Set seaborn style
sns.set(style="whitegrid", context="talk")
# Calculate subplot grid size
n = len(CompoundsList)
cols = 4
rows = int(np.ceil(n / cols))
# Create figure and axes
fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))
fig.suptitle(f"Agonist vs Antagonist logFC Comparison Across lines ({DS}, {celltype})", fontsize=20, y=1.02)
axes = axes.flatten()
# Loop through compounds and plot
for idx, i in enumerate(CompoundsList):
ax = axes[idx]
try:
CTL04Eres = PerLineAgonistVsAntagonist[f"{i}_CTL04E"].copy()
CTL08Ares = PerLineAgonistVsAntagonist[f"{i}_CTL08A"].copy()
CTL04Eres = CTL04Eres[["logFC"]].rename(columns={"logFC":"CTL04E logFC"})
CTL08Ares = CTL08Ares[["logFC"]].rename(columns={"logFC":"CTL08A logFC"})
merged = pd.concat([CTL04Eres, CTL08Ares], axis=1)
# Scatterplot
sns.scatterplot(data=merged, x="CTL04E logFC", y="CTL08A logFC", ax=ax, s=20, alpha=0.7)
# Diagonal line
lims = [
np.min([merged["CTL04E logFC"].min(), merged["CTL08A logFC"].min()]),
np.max([merged["CTL04E logFC"].max(), merged["CTL08A logFC"].max()]),
]
ax.plot(lims, lims, "--", color="gray", linewidth=1)
ax.set_xlim(lims)
ax.set_ylim(lims)
# Titles and labels
ax.set_title(i, fontsize=14)
ax.set_xlabel("CTL04E logFC")
ax.set_ylabel("CTL08A logFC")
except KeyError as e:
ax.set_visible(False) # Skip if data is missing
# Hide any unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
plt.show()
%%R -o DifferentialEffectCompoundVsDMSO
AllGenes <- dim(Counts)[1]
DifferentialEffectCompoundVsDMSO <- list()
for (condition in setdiff(unique(MD[["condition"]]), "DMSO")) {
# Only process conditions containing "AGONIST"
# Replace "AGONIST" with "ANTAGONIST" in the condition string
condAntagonist <- gsub("AGONIST", "ANTAGONIST", condition)
contrastExpression <- paste("(CTL04E.", condition, "-", "CTL04E.DMSO)-(CTL08A.", condition, "-", "CTL08A.DMSO)",
sep = "")
print(contrastExpression)
mycontrast <- makeContrasts(contrasts = contrastExpression, levels = mm)
DifferentialEffectCompoundVsDMSO[[paste(condition, ".VS.DMSO.DifferentialCTL04E-CTL08A",sep = "") ]] <- topTags(glmQLFTest(fit, contrast = mycontrast), adjust.method = "fdr", n = AllGenes, sort.by = "PValue")$table
}
[1] "(CTL04E.ESTROGENAGONIST-CTL04E.DMSO)-(CTL08A.ESTROGENAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ESTROGENANTAGONIST-CTL04E.DMSO)-(CTL08A.ESTROGENANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ANDROGENAGONIST-CTL04E.DMSO)-(CTL08A.ANDROGENAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ANDROGENANTAGONIST-CTL04E.DMSO)-(CTL08A.ANDROGENANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ARYLHYDAGONIST-CTL04E.DMSO)-(CTL08A.ARYLHYDAGONIST-CTL08A.DMSO)" [1] "(CTL04E.ARYLHYDANTAGONIST-CTL04E.DMSO)-(CTL08A.ARYLHYDANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.LIVERXAGONIST-CTL04E.DMSO)-(CTL08A.LIVERXAGONIST-CTL08A.DMSO)" [1] "(CTL04E.LIVERXANTAGONIST-CTL04E.DMSO)-(CTL08A.LIVERXANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.GLUCOCORTAGONIST-CTL04E.DMSO)-(CTL08A.GLUCOCORTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.GLUCOCORTANTAGONIST-CTL04E.DMSO)-(CTL08A.GLUCOCORTANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.THYROIDAGONIST-CTL04E.DMSO)-(CTL08A.THYROIDAGONIST-CTL08A.DMSO)" [1] "(CTL04E.THYROIDANTAGONIST-CTL04E.DMSO)-(CTL08A.THYROIDANTAGONIST-CTL08A.DMSO)" [1] "(CTL04E.RETAGONIST-CTL04E.DMSO)-(CTL08A.RETAGONIST-CTL08A.DMSO)" [1] "(CTL04E.RETANTAGONIST-CTL04E.DMSO)-(CTL08A.RETANTAGONIST-CTL08A.DMSO)"
FDR = 0.05
dir_path = "./{}_{}_DEGs".format(DS, celltype)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
SharedConditionVsDmso_filtered = {}
for k in list(SharedConditionVsDmso.keys()):
DEGs = SharedConditionVsDmso[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/SharedCompoundVsDmso.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
SharedConditionVsDmso_filtered[k] = DEGs[DEGs["FDR"] < FDR]
SharedAgonistVsAntagonist_filtered = {}
for k in list(SharedAgonistVsAntagonist.keys()):
DEGs = SharedAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/SharedAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
SharedAgonistVsAntagonist_filtered[k] = DEGs[DEGs["FDR"] < FDR]
DifferentialEffectCompoundVsDMSO_filtered = {}
for k in list(DifferentialEffectCompoundVsDMSO.keys()):
DEGs = DifferentialEffectCompoundVsDMSO[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DifferentialEffectCompoundVsDMSO.{}_{}_{}.FDR{}.xlsx".format(DS, celltype, k, celltype, DS,FDR))
DifferentialEffectCompoundVsDMSO_filtered[k] = DEGs[DEGs["FDR"] < FDR]
DifferentialEffectAgonistVsAntagonist_filtered = {}
for k in list(DifferentialEffectAgonistVsAntagonist.keys()):
DEGs = DifferentialEffectAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DifferentialEffectAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype, k, celltype, DS,FDR))
DifferentialEffectAgonistVsAntagonist_filtered[k] = DEGs[DEGs["FDR"] < FDR]
PerLineAgonistVsAntagonist_filtered = {}
for k in list(PerLineAgonistVsAntagonist.keys()):
DEGs = PerLineAgonistVsAntagonist[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DEGsPerLineAgonistVsAntagonist.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
PerLineAgonistVsAntagonist_filtered["UP_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] > 2)]
PerLineAgonistVsAntagonist_filtered["DOWN_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] < -2)]
PerLineConditionVsDMSO_filtered = {}
for k in list(PerLineConditionVsDMSO.keys()):
DEGs = PerLineConditionVsDMSO[k]
DEGs[DEGs["FDR"] < FDR].to_excel("./{}_{}_DEGs/DEGsPerLineCompoundVsDMSO.{}_{}_{}.FDR{}.xlsx".format(DS, celltype,k, celltype, DS,FDR))
PerLineConditionVsDMSO_filtered["UP_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] > 2)]
PerLineConditionVsDMSO_filtered["DOWN_{}".format(k)] = DEGs[(DEGs["FDR"] < FDR) & (DEGs["logFC"] < -2)]
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
### AGONIST VS ANTAGONIST
JaccardDF = pd.DataFrame(index = list(PerLineAgonistVsAntagonist_filtered.keys()), columns = list(PerLineAgonistVsAntagonist_filtered.keys()))
for combination in list(itertools.combinations(list(PerLineAgonistVsAntagonist_filtered.keys()),2)):
set0 = set(PerLineAgonistVsAntagonist_filtered[combination[0]].genes.tolist())
set1 = set(PerLineAgonistVsAntagonist_filtered[combination[1]].genes.tolist())
Int = set0.intersection(set1)
Union = set0.union(set1)
Jaccard = len(Int) / len(Union)
JaccardDF.loc[combination[0],combination[1]] = Jaccard
JaccardDF.loc[combination[1],combination[0]] = Jaccard
JaccardDF = JaccardDF.fillna(1)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Assuming JaccardDF is your DataFrame of Jaccard similarities
# Convert similarity to distance: distance = 1 - similarity
distance_matrix = 1 - JaccardDF.astype(float)
# Optionally, sort the DataFrame for a canonical order
distance_matrix = distance_matrix.sort_index().sort_index(axis=1)
# Create a clustermap using the 'bwr' colormap with improved aesthetics.
g = sns.clustermap(
distance_matrix,
metric="euclidean", # or 'cityblock', 'cosine', etc.
method="average", # clustering method: single, complete, ward, etc.
cmap="bwr_r", # blue-white-red palette
figsize=(15, 15),
cbar_pos=(0.02, 0.8, 0.05, 0.18) # adjust colorbar position if needed
)
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
# The clustermap reorders rows/columns; get the reordered DataFrame.
reordered_data = distance_matrix.iloc[
g.dendrogram_row.reordered_ind, g.dendrogram_col.reordered_ind
]
# Annotate each cell with the corresponding Jaccard similarity value.
# Note: similarity = 1 - distance
for i, row in enumerate(reordered_data.index):
for j, col in enumerate(reordered_data.columns):
d = reordered_data.loc[row, col]
jaccard_sim = 1 - d # Convert back to similarity
g.ax_heatmap.text(
j + 0.5, i + 0.5,
f"{jaccard_sim:.2f}",
ha='center', va='center',
color='black', fontsize=8
)
# Remove grid lines from the heatmap
g.ax_heatmap.grid(False)
# Improve overall aesthetics
g.fig.suptitle("DEGs Jaccard similarity Agonist vs Antagonist", fontsize=16, y=1.05)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=8)
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8)
g.ax_heatmap.tick_params(axis='both', which='both', length=0)
g.cax.set_position([.92, .2, .03, .45]) # adjust the colorbar position if needed
plt.show()
### CONDITION VS DMSO
### CONDITION VS DMSO
### CONDITION VS DMSO
### CONDITION VS DMSO
JaccardDF = pd.DataFrame(index = list(PerLineConditionVsDMSO_filtered.keys()), columns = list(PerLineConditionVsDMSO_filtered.keys()))
for combination in list(itertools.combinations(list(PerLineConditionVsDMSO_filtered.keys()),2)):
set0 = set(PerLineConditionVsDMSO_filtered[combination[0]].genes.tolist())
set1 = set(PerLineConditionVsDMSO_filtered[combination[1]].genes.tolist())
Int = set0.intersection(set1)
Union = set0.union(set1)
Jaccard = len(Int) / len(Union)
JaccardDF.loc[combination[0],combination[1]] = Jaccard
JaccardDF.loc[combination[1],combination[0]] = Jaccard
JaccardDF = JaccardDF.fillna(1)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Assuming JaccardDF is your DataFrame of Jaccard similarities
# Convert similarity to distance: distance = 1 - similarity
distance_matrix = 1 - JaccardDF.astype(float)
# Optionally, sort the DataFrame for a canonical order
distance_matrix = distance_matrix.sort_index().sort_index(axis=1)
# Create a clustermap using the 'bwr' colormap with improved aesthetics.
g = sns.clustermap(
distance_matrix,
metric="euclidean", # or 'cityblock', 'cosine', etc.
method="average", # clustering method: single, complete, ward, etc.
cmap="bwr_r", # blue-white-red palette
figsize=(20, 20),
cbar_pos=(0.02, 0.8, 0.05, 0.18) # adjust colorbar position if needed
)
# Hide the dendrograms (do not display them)
g.ax_row_dendrogram.set_visible(False)
g.ax_col_dendrogram.set_visible(False)
# The clustermap reorders rows/columns; get the reordered DataFrame.
reordered_data = distance_matrix.iloc[
g.dendrogram_row.reordered_ind, g.dendrogram_col.reordered_ind
]
# Annotate each cell with the corresponding Jaccard similarity value.
# Note: similarity = 1 - distance
for i, row in enumerate(reordered_data.index):
for j, col in enumerate(reordered_data.columns):
d = reordered_data.loc[row, col]
jaccard_sim = 1 - d # Convert back to similarity
g.ax_heatmap.text(
j + 0.5, i + 0.5,
f"{jaccard_sim:.2f}",
ha='center', va='center',
color='black', fontsize=5
)
# Remove grid lines from the heatmap
g.ax_heatmap.grid(False)
# Improve overall aesthetics
g.fig.suptitle("DEGs Jaccard similarity Compound vs DMSO", fontsize=16, y=1.05)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=8)
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8)
g.ax_heatmap.tick_params(axis='both', which='both', length=0)
g.cax.set_position([.92, .2, .03, .45]) # adjust the colorbar position if needed
plt.show()
CTL04Eresults = pd.concat([PerLineAgonistVsAntagonist_filtered[k].assign(Line="CTL04E").assign(Group=k) for k in list(PerLineAgonistVsAntagonist_filtered.keys()) if "_CTL04E" in k], ignore_index=True)
CTL04Eresults["Direction"] = CTL04Eresults.Group.str.split("_", expand=True)[0]
CTL04Eresults["Contrast"] = CTL04Eresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL04Eresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL04Eresults = CTL04Eresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
CTL08Aresults = pd.concat([PerLineAgonistVsAntagonist_filtered[k].assign(Line="CTL08A").assign(Group=k) for k in list(PerLineAgonistVsAntagonist_filtered.keys()) if "_CTL08A" in k], ignore_index=True)
CTL08Aresults["Direction"] = CTL08Aresults.Group.str.split("_", expand=True)[0]
CTL08Aresults["Contrast"] = CTL08Aresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL08Aresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL08Aresults = CTL08Aresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
import importlib
import matplotlib.pyplot as plt
import seaborn as sns
# Reload matplotlib.pyplot to restore its original state
# Prepare your summary DataFrame indexed by 'Contrast'
summary_results = {}
summary_results["CTL04E"] = CTL04Eresults.set_index('Contrast')[['Up', 'Down', 'Total']]
summary_results["CTL08A"] = CTL08Aresults.set_index('Contrast')[['Up', 'Down', 'Total']]
# Compute global minimum and maximum values across both DataFrames for uniform scaling
global_min = min(df.min().min() for df in summary_results.values())
global_max = max(df.max().max() for df in summary_results.values())
# Create a figure with 1 row and 2 columns of subplots
fig, axes = plt.subplots(1, 2, figsize=(20, 10))
# Loop through the dictionary keys and plot each heatmap on its corresponding axis
for n, k in enumerate(["CTL08A", "CTL04E"]):
ax = axes[n] # get the current axis
show_cbar = (n == 1)
sns.heatmap(
summary_results[k],
annot=True,
annot_kws={"size": 20}, # Increase text size here
fmt="d",
cmap='YlOrBr',
linewidths=0,
linecolor='none',
vmin=global_min, # set uniform minimum
vmax=global_max, # set uniform maximum
ax=ax,
cbar=show_cbar,
cbar_kws={'label': 'Count'} if show_cbar else None,
)
# Remove any visible grid lines by setting the QuadMesh edge color to the face color
ax.collections[0].set_edgecolor("face")
ax.grid(False)
# Set the axis labels and title using the axis methods
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_title(k, fontdict={'size': 25})
# Adjust the layout and display the figure
plt.tight_layout()
plt.show()
CTL04Eresults = pd.concat([PerLineConditionVsDMSO_filtered[k].assign(Line="CTL04E").assign(Group=k) for k in list(PerLineConditionVsDMSO_filtered.keys()) if "_CTL04E" in k], ignore_index=True)
CTL04Eresults["Direction"] = CTL04Eresults.Group.str.split("_", expand=True)[0]
CTL04Eresults["Contrast"] = CTL04Eresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL04Eresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL04Eresults = CTL04Eresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
CTL08Aresults = pd.concat([PerLineConditionVsDMSO_filtered[k].assign(Line="CTL08A").assign(Group=k) for k in list(PerLineConditionVsDMSO_filtered.keys()) if "_CTL08A" in k], ignore_index=True)
CTL08Aresults["Direction"] = CTL08Aresults.Group.str.split("_", expand=True)[0]
CTL08Aresults["Contrast"] = CTL08Aresults.Group.str.split("_", expand=True)[1].str.split(".", expand=True)[0]
CTL08Aresults.groupby(["Contrast","Direction"], as_index=False).size()
CTL08Aresults = CTL08Aresults.groupby('Contrast').apply(lambda group: pd.Series({
'Up': (group['Direction'] == 'UP').sum(),
'Down': (group['Direction'] == 'DOWN').sum(),
'Total': group.shape[0]
})).reset_index()
# Reload matplotlib.pyplot to restore its original state
importlib.reload(plt)
# Prepare your summary DataFrame indexed by 'Contrast'
summary_results = {}
summary_results["CTL04E"] = CTL04Eresults.set_index('Contrast')[['Up', 'Down', 'Total']]
summary_results["CTL08A"] = CTL08Aresults.set_index('Contrast')[['Up', 'Down', 'Total']]
# Compute global minimum and maximum values across both DataFrames for uniform scaling
global_min = min(df.min().min() for df in summary_results.values())
global_max = max(df.max().max() for df in summary_results.values())
# Create a figure with 1 row and 2 columns of subplots
fig, axes = plt.subplots(1, 2, figsize=(20, 20))
# Loop through the dictionary keys and plot each heatmap on its corresponding axis
for n, k in enumerate(["CTL08A", "CTL04E"]):
ax = axes[n] # get the current axis
show_cbar = (n == 1)
sns.heatmap(
summary_results[k],
annot=True,
annot_kws={"size": 20}, # Increase text size here
fmt="d",
cmap='YlOrBr',
linewidths=0,
linecolor='none',
vmin=global_min, # set uniform minimum
vmax=global_max, # set uniform maximum
ax=ax,
cbar=show_cbar,
cbar_kws={'label': 'Count'} if show_cbar else None,
)
# Remove any visible grid lines by setting the QuadMesh edge color to the face color
ax.collections[0].set_edgecolor("face")
ax.grid(False)
# Set the axis labels and title using the axis methods
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel(None)
ax.set_ylabel(None)
ax.set_title(k, fontdict={'size': 25})
# Adjust the layout and display the figure
plt.tight_layout()
plt.show()
ScatterploDict = {}
for k in list(PerLineConditionVsDMSO.keys()):
Compound = k.split(".")[0]
if Compound not in list(ScatterploDict.keys()):
ScatterploDict[Compound] = pd.DataFrame()
Line = k.split("_")[-1]
DF = PerLineConditionVsDMSO[k].assign(line=Line)
DifferentiallyResponding = DifferentialEffectCompoundVsDMSO_filtered[f"{Compound}.VS.DMSO.DifferentialCTL04E-CTL08A"]
DifferentiallyResponding = DifferentiallyResponding[abs(DifferentiallyResponding["logFC"]) >= 1.5]
DifferentiallyResponding = DifferentiallyResponding[DifferentiallyResponding["FDR"] <= 0.05].genes.tolist()
DF = DF[DF.genes.isin(DifferentiallyResponding)]
ScatterploDict[Compound] = pd.concat([ScatterploDict[Compound], DF], ignore_index=True)
ScatterploDict[Compound]
for Compound in list(ScatterploDict.keys()):
# Assuming ScatterploDict[Compound] is your DataFrame
df = ScatterploDict[Compound]
# Pivot the DataFrame so that each gene is a row and you have one column per line
df_pivot = df.pivot(index='genes', columns='line', values='logFC')
# Optionally, drop genes that don't have logFC values for both lines
df_pivot = df_pivot.fillna(0)
if df_pivot.shape[0] == 0:
print(f"No genes differentially regulated found for {Compound} according to selected filters")
continue
df_pivot["Delta"] = abs(df_pivot["CTL04E"] - df_pivot["CTL08A"])
plt.figure(figsize=(10, 10))
# Optionally scale the marker sizes (e.g., multiply Delta by a constant factor)
marker_sizes = df_pivot["Delta"] * 100 # Adjust scaling factor as needed
# Plot the scatter points using the correct parameter "s" for sizes
plt.scatter(df_pivot['CTL08A'], df_pivot['CTL04E'], alpha=0.2, color='blue', s=marker_sizes)
# Annotate each point with its gene name
for gene in df_pivot.index:
x = df_pivot.loc[gene, 'CTL08A']
y = df_pivot.loc[gene, 'CTL04E']
plt.text(x, y, gene, fontsize=8, alpha=0.8)
# Draw reference lines at 0 (for 0-centered values)
plt.axvline(0, color='gray', linestyle='--')
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel('CTL08A logFC ', fontsize=25)
plt.ylabel('CTL04E logFC ', fontsize=25)
plt.title(Compound, fontsize=25)
plt.tight_layout()
plt.show()
No genes differentially regulated found for THYROIDANTAGONIST according to selected filters