import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib_venn import venn3, venn2
import seaborn as sns
import scanpy as sc
import glob
import sys
sys.path.append("./../../../../utilities_folder/")
from utilities import intTable
Set R environment with rpy2:
import rpy2.rinterface_lib.callbacks
import anndata2ri
import logging
from rpy2.robjects import pandas2ri
import rpy2.robjects as ro
from scipy import stats
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython
Set up parameters for Python plots:
%matplotlib inline
sc.set_figure_params(dpi = 600, fontsize = 20)
plt.rcParams['pdf.fonttype'] = 'truetype'
cmap_up = sns.light_palette("red", as_cmap=True)
cmap_down = sns.light_palette("blue", as_cmap=True)
cmap_all = sns.light_palette("seagreen", as_cmap=True)
Set folder paths
tables_folder = './tables/'
egVSipsc = pd.read_excel(tables_folder + 'Bulk_hEGCLCs_vs_hiPSC_filtered.xlsx', index_col = 0)
egVSpgc = pd.read_excel(tables_folder + 'Bulk_hPGCLCs_vs_EGCLC_filtered.xlsx', index_col = 0)
ipscVSpgc = pd.read_excel(tables_folder + 'Bulk_hPGCLCs_vs_hiPSC_filtered.xlsx', index_col = 0)
up_eg_vs_ipsc = egVSipsc[egVSipsc.logFC > 0].index.tolist()
len(up_eg_vs_ipsc)
82
down_eg_vs_ipsc = egVSipsc[egVSipsc.logFC < 0].index.tolist()
len(down_eg_vs_ipsc)
34
egVSpgc
logFC | logCPM | LR | PValue | FDR | Gene | -log10(FDR) | |
---|---|---|---|---|---|---|---|
IGF1 | 15.896291 | 6.846814 | 2094.605002 | 0.000000 | 1.671936e-316 | IGF1 | 315.776780 |
SOX17 | 13.895054 | 8.632823 | 2928.672152 | 0.000000 | 1.671936e-316 | SOX17 | 315.776780 |
CHI3L2 | 13.443653 | 6.417250 | 2160.317252 | 0.000000 | 1.671936e-316 | CHI3L2 | 315.776780 |
IRX6 | 12.703410 | 6.493450 | 2260.318922 | 0.000000 | 1.671936e-316 | IRX6 | 315.776780 |
NANOS3 | 12.560026 | 8.623343 | 1951.820313 | 0.000000 | 1.671936e-316 | NANOS3 | 315.776780 |
... | ... | ... | ... | ... | ... | ... | ... |
ATP4A | -2.398388 | -0.822223 | 8.925405 | 0.002812 | 4.787297e-03 | ATP4A | 2.319910 |
KCNE2 | -2.142631 | -0.700751 | 8.582153 | 0.003395 | 5.709500e-03 | KCNE2 | 2.243402 |
PPP1R36 | -2.143626 | 0.183762 | 8.424237 | 0.003703 | 6.191570e-03 | PPP1R36 | 2.208199 |
BDKRB2 | -2.144906 | -0.085745 | 8.318019 | 0.003925 | 6.543553e-03 | BDKRB2 | 2.184186 |
EXOC3L2 | -2.121139 | 1.862711 | 7.898945 | 0.004946 | 8.133181e-03 | EXOC3L2 | 2.089740 |
2663 rows × 7 columns
up_pgc_vs_eg = egVSpgc[egVSpgc.logFC > 0].index.tolist()
down_pgc_vs_eg = egVSpgc[egVSpgc.logFC < 0].index.tolist()
len(up_pgc_vs_eg)
1024
len(down_pgc_vs_eg)
1639
ipscVSpgc
logFC | logCPM | LR | PValue | FDR | Gene | -log10(FDR) | |
---|---|---|---|---|---|---|---|
SOX17 | 17.549030 | 8.894461 | 2025.498153 | 0.000000 | 5.866289e-316 | SOX17 | 315.231637 |
IGF1 | 15.801469 | 7.113052 | 1664.385070 | 0.000000 | 5.866289e-316 | IGF1 | 315.231637 |
IRX6 | 15.460311 | 6.758986 | 1915.660894 | 0.000000 | 5.866289e-316 | IRX6 | 315.231637 |
WNT2 | 15.321818 | 6.614517 | 2056.095895 | 0.000000 | 5.866289e-316 | WNT2 | 315.231637 |
CHI3L2 | 12.600977 | 6.683063 | 1781.461462 | 0.000000 | 5.866289e-316 | CHI3L2 | 315.231637 |
... | ... | ... | ... | ... | ... | ... | ... |
LRRC73 | -2.415578 | -0.129566 | 11.514055 | 0.000691 | 1.260929e-03 | LRRC73 | 2.899309 |
CAPS2 | -2.001239 | -0.344132 | 11.222459 | 0.000808 | 1.464992e-03 | CAPS2 | 2.834165 |
ERBB4 | -2.021953 | -0.359235 | 10.757452 | 0.001039 | 1.856177e-03 | ERBB4 | 2.731381 |
OR14K1 | -2.033268 | -0.115466 | 9.782277 | 0.001762 | 3.062090e-03 | OR14K1 | 2.513982 |
ZNF540 | 2.169959 | 0.052795 | 7.592525 | 0.005861 | 9.460098e-03 | ZNF540 | 2.024104 |
2585 rows × 7 columns
up_pgc_vs_ipsc = ipscVSpgc[ipscVSpgc.logFC > 0].index.tolist()
down_pgc_vs_ipsc = ipscVSpgc[ipscVSpgc.logFC < 0].index.tolist()
len(up_pgc_vs_ipsc)
953
len(down_pgc_vs_ipsc)
1632
_ = venn3([set(egVSipsc.index), set(egVSpgc.index), set(ipscVSpgc.index)], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
Let's put all together:
_ = venn3([set(up_pgc_vs_eg), set(up_pgc_vs_ipsc), set(up_eg_vs_ipsc)], set_labels=('UP hPGCLCs_vs_hEGCLC', 'UP hPGCLCs_vs_hiPSC', 'UP hEGCLCs_vs_hiPSC') )
Let's put all together:
_ = venn3([set(down_pgc_vs_eg), set(down_pgc_vs_ipsc), set(down_eg_vs_ipsc)], set_labels=('DOWN hPGCLCs_vs_hEGCLC', 'DOWN hPGCLCs_vs_hiPSC', 'DOWN hEGCLCs_vs_hiPSC') )
venn2([set(up_pgc_vs_eg), set(up_pgc_vs_ipsc)], set_labels=('UP hPGCLCs_vs_hEGCLC', 'UP hPGCLCs_vs_hiPSC') )
<matplotlib_venn._common.VennDiagram at 0x7fa16cefc220>
venn2([set(up_pgc_vs_eg), set(up_eg_vs_ipsc)], set_labels=('UP hPGCLCs_vs_hEGCLC', 'UP hEGCLCs_vs_hiPSC') )
<matplotlib_venn._common.VennDiagram at 0x7fa16cf1c940>
venn2([set(up_pgc_vs_ipsc), set(up_eg_vs_ipsc)], set_labels=('UP hPGCLCs_vs_hiPSC', 'UP hEGCLCs_vs_hiPSC') )
<matplotlib_venn._common.VennDiagram at 0x7fa16ce75700>
venn2([set(up_eg_vs_ipsc), set(down_pgc_vs_eg)], set_labels=('UP hEGCLC_vs_hiPSC', 'UP hEGCLC_vs_hPGCLC') )
<matplotlib_venn._common.VennDiagram at 0x7fa16ce47070>
venn2([set(up_eg_vs_ipsc), set(down_pgc_vs_ipsc)], set_labels=('UP hEGCLC_vs_hiPSC', 'UP hPGCLC_vs_hiPSC') )
<matplotlib_venn._common.VennDiagram at 0x7fa16d559ac0>
venn2([set(down_eg_vs_ipsc), set(up_pgc_vs_eg)], set_labels=('UP hiPSC_vs_hEGCLC', 'UP hPGCLC_vs_hEGCLC') )
<matplotlib_venn._common.VennDiagram at 0x7fa16d027940>
venn2([set(down_eg_vs_ipsc), set(down_pgc_vs_eg)], set_labels=('UP hiPSC_vs_hEGCLC', 'UP hEGCLC_vs_hPGCLC') )
<matplotlib_venn._common.VennDiagram at 0x7fa151d75220>
venn2([set(down_eg_vs_ipsc), set(up_pgc_vs_ipsc)], set_labels=('UP hiPSC_vs_hEGCLC', 'UP hPGCLC_vs_hiPSC') )
<matplotlib_venn._common.VennDiagram at 0x7fa1675cef70>
venn2([set(down_eg_vs_ipsc), set(down_pgc_vs_ipsc)], set_labels=('UP hiPSC_vs_hEGCLC', 'UP hiPSC_vs_hPGCLC') )
<matplotlib_venn._common.VennDiagram at 0x7fa16cd6dfd0>
venn2([set(up_pgc_vs_eg), set(down_pgc_vs_ipsc)], set_labels=('UP hPGCLC_vs_hEGCLC', 'UP hiPSC_vs_hPGCLC') )
<matplotlib_venn._common.VennDiagram at 0x7fa16cd9b2e0>
venn2([set(down_pgc_vs_eg), set(up_pgc_vs_ipsc)], set_labels=('UP hEGCLC_vs_hPGCLC', 'UP hPGCLC_vs_hiPSC') )
<matplotlib_venn._common.VennDiagram at 0x7fa16cd63070>
venn2([set(down_pgc_vs_eg), set(down_pgc_vs_ipsc)], set_labels=('UP hEGCLC_vs_hPGCLC', 'UP hiPSC_vs_hPGCLC') )
<matplotlib_venn._common.VennDiagram at 0x7fa16ccb0610>
%%R -i up_eg_vs_ipsc -i down_eg_vs_ipsc -i up_pgc_vs_eg -i down_pgc_vs_eg -i up_pgc_vs_ipsc -i down_pgc_vs_ipsc
loc <- './../../../../R_loc' # pointing to the renv environment
.libPaths(loc)
library(GeneOverlap)
up <- list(up_eg_vs_ipsc, up_pgc_vs_eg, up_pgc_vs_ipsc)
names(up) <- c('up_eg_vs_ipsc', 'up_pgc_vs_eg', 'up_pgc_vs_ipsc')
down <- list(down_eg_vs_ipsc, down_pgc_vs_eg, down_pgc_vs_ipsc)
names(down) <- c('down_eg_vs_ipsc', 'down_pgc_vs_eg', 'down_pgc_vs_ipsc')
gom.obj_up_down <- newGOM(up, down, genome.size = 14582)
gom.obj_up_up <- newGOM(up, genome.size = 14582)
gom.obj_down_down <- newGOM(down, genome.size = 14582)
%%R
drawHeatmap(gom.obj_up_down, what = 'Jaccard', grid.col="Greens", note.col="white")
%%R
drawHeatmap(gom.obj_up_up, what = 'Jaccard', grid.col="Reds", note.col="white")
%%R
drawHeatmap(gom.obj_down_down, what = 'Jaccard', grid.col="Blues", note.col="white")
l1 = list(set(egVSipsc.index))
l1.sort()
l2 = list(set(egVSpgc.index))
l2.sort()
l3 = list(set(ipscVSpgc.index))
l3.sort()
upsetplot_df = pd.DataFrame([l1,l2,l3]).T
upsetplot_df.columns = ['hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC']
stats_pos = {}
for i, n in zip([l1,l2,l3], upsetplot_df.columns):
for j, m in zip([l1,l2,l3], upsetplot_df.columns):
key = n + '-' + m
set1 = set(i)
set2 = set(j)
intersection = len(set1.intersection(set2))
union = len(set1.union(set2))
stats_pos[key] = intersection/union
stats_pos
{'hEGCLCs_vs_hiPSC-hEGCLCs_vs_hiPSC': 1.0, 'hEGCLCs_vs_hiPSC-hPGCLCs_vs_hEGCLC': 0.025082995204721504, 'hEGCLCs_vs_hiPSC-hPGCLCs_vs_hiPSC': 0.014650638617580767, 'hPGCLCs_vs_hEGCLC-hEGCLCs_vs_hiPSC': 0.025082995204721504, 'hPGCLCs_vs_hEGCLC-hPGCLCs_vs_hEGCLC': 1.0, 'hPGCLCs_vs_hEGCLC-hPGCLCs_vs_hiPSC': 0.7297297297297297, 'hPGCLCs_vs_hiPSC-hEGCLCs_vs_hiPSC': 0.014650638617580767, 'hPGCLCs_vs_hiPSC-hPGCLCs_vs_hEGCLC': 0.7297297297297297, 'hPGCLCs_vs_hiPSC-hPGCLCs_vs_hiPSC': 1.0}
df_to_plot = pd.DataFrame(stats_pos.values(), index = stats_pos.keys())
i1 = pd.Series(df_to_plot.index).apply(lambda x: x.split('-')[0])
i2 = pd.Series(df_to_plot.index).apply(lambda x: x.split('-')[1])
df_to_plot = df_to_plot.reset_index()
df_to_plot['i1'] = i1
df_to_plot['i2'] = i2
df_to_plot = df_to_plot.pivot(index='i1', columns='i2', values=0)
plt.figure(figsize = (5, 5))
mask = np.triu(np.ones_like(df_to_plot, dtype=bool))
sns.heatmap(df_to_plot, mask=mask,
square=True, linewidths=.5, cbar_kws={"shrink": .5}, cmap = 'PuBu')
<AxesSubplot:xlabel='i2', ylabel='i1'>
egVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hEGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)
pgcVSeg_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_EGCLC_BP.xlsx', index_col = 0)
pgcVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)
egVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hEGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)
pgcVSeg_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_EGCLC_BP.xlsx', index_col = 0)
pgcVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)
egVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hEGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)
pgcVSeg_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_EGCLC_BP.xlsx', index_col = 0)
pgcVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)
len(egVSipsc_up)
15
len(pgcVSeg_up)
159
len(pgcVSipsc_up)
128
_ = venn3([set(egVSipsc_up['GO.ID']), set(pgcVSeg_up['GO.ID']), set(pgcVSipsc_up['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
pgcVSeg_up
GO.ID | Term | Annotated | Significant | Expected | Statistics | ER | -log10(pvalue) | Significant/Annotated | |
---|---|---|---|---|---|---|---|---|---|
1 | GO:0045165 | cell fate commitment | 170 | 46 | 12.49 | 8.400000e-08 | 3.68 | 7.075721 | 0.270588 |
2 | GO:0001709 | cell fate determination | 22 | 10 | 1.62 | 1.200000e-06 | 6.17 | 5.920819 | 0.454545 |
3 | GO:0003161 | cardiac conduction system development | 27 | 11 | 1.98 | 1.400000e-06 | 5.56 | 5.853872 | 0.407407 |
4 | GO:0060070 | canonical Wnt signaling pathway | 258 | 39 | 18.95 | 3.700000e-06 | 2.06 | 5.431798 | 0.151163 |
5 | GO:0007422 | peripheral nervous system development | 68 | 16 | 4.99 | 3.900000e-06 | 3.21 | 5.408935 | 0.235294 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
155 | GO:0019369 | arachidonic acid metabolic process | 33 | 7 | 2.42 | 8.900000e-03 | 2.89 | 2.050610 | 0.212121 |
156 | GO:0001947 | heart looping | 50 | 9 | 3.67 | 9.750000e-03 | 2.45 | 2.010995 | 0.180000 |
157 | GO:0055007 | cardiac muscle cell differentiation | 82 | 13 | 6.02 | 9.800000e-03 | 2.16 | 2.008774 | 0.158537 |
158 | GO:0031954 | positive regulation of protein autophosp... | 26 | 6 | 1.91 | 9.950000e-03 | 3.14 | 2.002177 | 0.230769 |
159 | GO:0051385 | response to mineralocorticoid | 26 | 6 | 1.91 | 9.950000e-03 | 3.14 | 2.002177 | 0.230769 |
159 rows × 9 columns
intersection = set(pgcVSeg_up['Term']).intersection(set(pgcVSipsc_up['Term']))
df = pd.DataFrame(index = intersection)
pgcVSeg_up.index = pgcVSeg_up.Term
pgcVSipsc_up.index = pgcVSipsc_up.Term
df['Scores_pgcVSeg_up'] = pgcVSeg_up[pgcVSeg_up.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_up[pgcVSipsc_up.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']
intTable(df.sort_values(by = 'Combined'), save = False)
set(egVSipsc_up['Term']).intersection(set(pgcVSipsc_up['Term']))
set()
set(egVSipsc_up['Term'])
{'DNA methylation', 'aging', 'aromatic compound catabolic process', 'cellular process involved in reproductio...', 'gene silencing by RNA', 'germ cell development', 'learning or memory', 'locomotory behavior', 'methylation', 'negative regulation of phosphorylation', 'negative regulation of protein-containin...', 'organic cyclic compound catabolic proces...', 'piRNA metabolic process', 'response to oxidative stress', 'spermatogenesis'}
_ = venn3([set(egVSipsc_down['GO.ID']), set(pgcVSeg_down['GO.ID']), set(pgcVSipsc_down['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term'])).intersection(set(egVSipsc_down['Term']))
set()
set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))
{'B cell receptor signaling pathway', 'G protein-coupled receptor signaling pat...', 'activation of transmembrane receptor pro...', 'adenylate cyclase-activating G protein-c...', 'adult locomotory behavior', 'antimicrobial humoral immune response me...', 'behavioral fear response', 'calcium ion transmembrane import into cy...', 'calcium ion-regulated exocytosis of neur...', 'cellular response to calcium ion', 'cellular sodium ion homeostasis', 'chemokine-mediated signaling pathway', 'chloride transmembrane transport', 'chondrocyte proliferation', 'complement activation, classical pathway', 'detection of temperature stimulus', 'dopamine metabolic process', 'engulfment of apoptotic cell', 'excitatory postsynaptic potential', 'exploration behavior', 'eye photoreceptor cell development', 'gamma-aminobutyric acid signaling pathwa...', 'ionotropic glutamate receptor signaling ...', 'locomotory behavior', 'lymph vessel morphogenesis', 'membrane depolarization', 'memory', 'monocyte chemotaxis', 'multicellular organismal response to str...', 'negative regulation of angiogenesis', 'negative regulation of blood pressure', 'negative regulation of fibroblast growth...', 'negative regulation of leukocyte apoptot...', 'negative regulation of vascular permeabi...', 'neuron maturation', 'neuronal action potential', 'neuropeptide signaling pathway', 'neurotransmitter metabolic process', 'neurotransmitter transport', 'neurotransmitter-gated ion channel clust...', 'nitric oxide mediated signal transductio...', 'phagocytosis, engulfment', 'phagocytosis, recognition', 'positive regulation of ERK1 and ERK2 cas...', 'positive regulation of G protein-coupled...', 'positive regulation of calcium ion trans...', 'positive regulation of cytosolic calcium...', 'positive regulation of dendrite extensio...', 'positive regulation of developmental gro...', 'positive regulation of excitatory postsy...', 'positive regulation of phosphatidylinosi...', 'positive regulation of positive chemotax...', 'positive regulation of protein kinase B ...', 'positive regulation of stem cell prolife...', 'positive regulation of synapse assembly', 'positive regulation of synaptic transmis...', 'potassium ion import across plasma membr...', 'potassium ion transmembrane transport', 'prostaglandin secretion', 'purinergic nucleotide receptor signaling...', 'receptor localization to synapse', 'regulation of AMPA receptor activity', 'regulation of action potential', 'regulation of cytosolic calcium ion conc...', 'regulation of dopamine secretion', 'regulation of humoral immune response', 'regulation of ion transmembrane transpor...', 'regulation of membrane potential', 'regulation of neuronal synaptic plastici...', 'regulation of neurotransmitter secretion', 'regulation of neutrophil chemotaxis', 'regulation of postsynaptic membrane pote...', 'regulation of postsynaptic neurotransmit...', 'regulation of presynapse assembly', 'regulation of ryanodine-sensitive calciu...', 'regulation of short-term neuronal synapt...', 'regulation of smooth muscle contraction', 'regulation of synaptic vesicle exocytosi...', 'regulation of ventricular cardiac muscle...', 'relaxation of muscle', 'sensory perception of smell', 'skeletal muscle contraction', 'sodium ion transmembrane transport', 'synaptic membrane adhesion', 'vascular endothelial growth factor signa...', 'visual learning'}
pgcVSeg_down= pgcVSeg_down[~pgcVSeg_down.index.duplicated()]
pgcVSipsc_down= pgcVSipsc_down[~pgcVSipsc_down.index.duplicated()]
intersection = set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))
df = pd.DataFrame(index = intersection)
#pgcVSeg_down.index = pgcVSeg_down.Term
#pgcVSipsc_down.index = pgcVSipsc_down.Term
df['Scores_pgcVSeg_up'] = pgcVSeg_down[pgcVSeg_down.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_down[pgcVSipsc_down.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']
intTable(df.sort_values(by = 'Combined'), save = False)
set(egVSipsc_down['Term']).intersection(set(pgcVSipsc_down['Term']))
set()
intersection = set(egVSipsc_down['Term']).intersection(set(pgcVSipsc_down['Term']))
df = pd.DataFrame(index = intersection)
egVSipsc_down.index = egVSipsc_down.Term
pgcVSipsc_down.index = pgcVSipsc_down.Term
df['Scores_egVSipsc_down'] = egVSipsc_down[egVSipsc_down.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_down'] = pgcVSipsc_down[pgcVSipsc_down.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_egVSipsc_down'] * df['Scores_pgcVSipsc_down']
intTable(df.sort_values(by = 'Combined'), save = False)
_ = venn3([set(egVSipsc_all['GO.ID']), set(pgcVSeg_all['GO.ID']), set(pgcVSipsc_all['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
set(pgcVSeg_all['Term']).intersection(set(pgcVSipsc_all['Term'])).intersection(set(egVSipsc_all['Term']))
{'postsynaptic membrane'}
set(pgcVSeg_all['Term']).intersection(set(pgcVSipsc_all['Term']))
{'AMPA glutamate receptor complex', 'GABA-ergic synapse', 'Golgi lumen', 'anchored component of membrane', 'anchored component of plasma membrane', 'blood microparticle', 'chloride channel complex', 'collagen trimer', 'dendrite membrane', 'dense core granule', 'excitatory synapse', 'external side of plasma membrane', 'extracellular matrix', 'glial cell projection', 'hippocampal mossy fiber to CA3 synapse', 'integral component of postsynaptic densi...', 'integral component of postsynaptic speci...', 'integral component of presynaptic membra...', 'intrinsic component of synaptic vesicle ...', 'postsynaptic density membrane', 'postsynaptic membrane', 'receptor complex', 'sodium channel complex', 'terminal bouton', 'voltage-gated potassium channel complex'}
set(egVSipsc_all['Term']).intersection(set(pgcVSipsc_all['Term']))
{'postsynaptic membrane'}
egVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hEGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)
pgcVSeg_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_EGCLC_MF.xlsx', index_col = 0)
pgcVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)
egVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hEGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)
pgcVSeg_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_EGCLC_MF.xlsx', index_col = 0)
pgcVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)
egVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hEGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)
pgcVSeg_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_EGCLC_MF.xlsx', index_col = 0)
pgcVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)
len(egVSipsc_up)
9
len(pgcVSeg_up)
22
len(pgcVSipsc_up)
19
_ = venn3([set(egVSipsc_up['GO.ID']), set(pgcVSeg_up['GO.ID']), set(pgcVSipsc_up['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
pgcVSeg_up
GO.ID | Term | Annotated | Significant | Expected | Statistics | ER | -log10(pvalue) | Significant/Annotated | |
---|---|---|---|---|---|---|---|---|---|
1 | GO:0001228 | DNA-binding transcription activator acti... | 329 | 57 | 24.10 | 7.200000e-10 | 2.37 | 9.142668 | 0.173252 |
2 | GO:0005109 | frizzled binding | 25 | 12 | 1.83 | 4.700000e-08 | 6.56 | 7.327902 | 0.480000 |
3 | GO:0005125 | cytokine activity | 102 | 20 | 7.47 | 4.700000e-05 | 2.68 | 4.327902 | 0.196078 |
4 | GO:0005201 | extracellular matrix structural constitu... | 123 | 24 | 9.01 | 5.400000e-05 | 2.66 | 4.267606 | 0.195122 |
5 | GO:0070851 | growth factor receptor binding | 97 | 18 | 7.10 | 1.500000e-04 | 2.54 | 3.823909 | 0.185567 |
6 | GO:0004896 | cytokine receptor activity | 50 | 12 | 3.66 | 2.000000e-04 | 3.28 | 3.698970 | 0.240000 |
7 | GO:0019955 | cytokine binding | 89 | 20 | 6.52 | 4.100000e-04 | 3.07 | 3.387216 | 0.224719 |
8 | GO:0008201 | heparin binding | 111 | 19 | 8.13 | 4.200000e-04 | 2.34 | 3.376751 | 0.171171 |
9 | GO:0008528 | G protein-coupled peptide receptor activ... | 66 | 13 | 4.83 | 7.100000e-04 | 2.69 | 3.148742 | 0.196970 |
10 | GO:0017046 | peptide hormone binding | 35 | 9 | 2.56 | 7.200000e-04 | 3.52 | 3.142668 | 0.257143 |
11 | GO:0002020 | protease binding | 109 | 18 | 7.98 | 9.000000e-04 | 2.26 | 3.045757 | 0.165138 |
12 | GO:0004623 | phospholipase A2 activity | 23 | 7 | 1.68 | 9.600000e-04 | 4.17 | 3.017729 | 0.304348 |
13 | GO:0140416 | transcription regulator inhibitor activi... | 18 | 6 | 1.32 | 1.310000e-03 | 4.55 | 2.882729 | 0.333333 |
14 | GO:0008373 | sialyltransferase activity | 18 | 6 | 1.32 | 1.310000e-03 | 4.55 | 2.882729 | 0.333333 |
15 | GO:0019838 | growth factor binding | 105 | 20 | 7.69 | 1.770000e-03 | 2.60 | 2.752027 | 0.190476 |
16 | GO:0004714 | transmembrane receptor protein tyrosine ... | 51 | 11 | 3.74 | 2.600000e-03 | 2.94 | 2.585027 | 0.215686 |
17 | GO:0005044 | scavenger receptor activity | 29 | 7 | 2.12 | 4.140000e-03 | 3.30 | 2.383000 | 0.241379 |
18 | GO:0004930 | G protein-coupled receptor activity | 196 | 31 | 14.36 | 5.960000e-03 | 2.16 | 2.224754 | 0.158163 |
19 | GO:0015144 | carbohydrate transmembrane transporter a... | 24 | 6 | 1.76 | 6.590000e-03 | 3.41 | 2.181115 | 0.250000 |
20 | GO:0008374 | O-acyltransferase activity | 45 | 7 | 3.30 | 7.990000e-03 | 2.12 | 2.097453 | 0.155556 |
21 | GO:0004622 | lysophospholipase activity | 18 | 5 | 1.32 | 7.990000e-03 | 3.79 | 2.097453 | 0.277778 |
22 | GO:0005126 | cytokine receptor binding | 147 | 22 | 10.77 | 8.590000e-03 | 2.04 | 2.066007 | 0.149660 |
pgcVSeg_up[pgcVSeg_up.Term.isin(intersection)]
GO.ID | Term | Annotated | Significant | Expected | Statistics | ER | -log10(pvalue) | Significant/Annotated |
---|
len(pgcVSeg_up[pgcVSeg_up.Term.isin(intersection)])
0
len(intersection)
0
pgcVSipsc_up[pgcVSipsc_up.Term.isin(intersection)]
GO.ID | Term | Annotated | Significant | Expected | Statistics | ER | -log10(pvalue) | Significant/Annotated |
---|
intersection = set(pgcVSeg_up['Term']).intersection(set(pgcVSipsc_up['Term']))
df = pd.DataFrame(index = intersection)
pgcVSeg_up.index = pgcVSeg_up.Term
pgcVSipsc_up.index = pgcVSipsc_up.Term
df['Scores_pgcVSeg_up'] = pgcVSeg_up[pgcVSeg_up.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_up[pgcVSipsc_up.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']
intTable(df.sort_values(by = 'Combined'), save = False)
set(egVSipsc_up['Term']).intersection(set(pgcVSipsc_up['Term']))
set()
set(egVSipsc_up['Term'])
{'DNA-binding transcription repressor acti...', 'active ion transmembrane transporter act...', 'active transmembrane transporter activit...', 'cation transmembrane transporter activit...', 'inorganic cation transmembrane transport...', 'mRNA binding', 'phosphoric ester hydrolase activity', 'structural molecule activity'}
_ = venn3([set(egVSipsc_down['GO.ID']), set(pgcVSeg_down['GO.ID']), set(pgcVSipsc_down['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term'])).intersection(set(egVSipsc_down['Term']))
set()
set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))
{'CoA-ligase activity', 'G protein-coupled peptide receptor activ...', 'G protein-coupled receptor activity', 'G-protein beta-subunit binding', 'Wnt-protein binding', 'alkali metal ion binding', 'amyloid-beta binding', 'antigen binding', 'calcium channel regulator activity', 'calcium-dependent phospholipid binding', 'chloride channel activity', 'cytokine activity', 'cytokine receptor activity', 'excitatory extracellular ligand-gated io...', 'extracellular matrix binding', 'extracellular matrix structural constitu...', 'fibroblast growth factor receptor bindin...', 'glutamate receptor activity', 'growth factor activity', 'hormone activity', 'inward rectifier potassium channel activ...', 'ligand-gated cation channel activity', 'neuropeptide receptor activity', 'neuropeptide receptor binding', 'organic acid:sodium symporter activity', 'peptide hormone binding', 'phosphatidylserine binding', 'protein tyrosine kinase activator activi...', 'proteoglycan binding', 'serine-type endopeptidase activity', 'serine-type endopeptidase inhibitor acti...', 'sulfotransferase activity', 'transmembrane receptor protein tyrosine ...', 'transmembrane signaling receptor activit...', 'transmitter-gated ion channel activity i...', 'voltage-gated ion channel activity'}
pgcVSeg_down= pgcVSeg_down[~pgcVSeg_down.index.duplicated()]
pgcVSipsc_down= pgcVSipsc_down[~pgcVSipsc_down.index.duplicated()]
intersection = set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))
df = pd.DataFrame(index = intersection)
df['Scores_pgcVSeg_up'] = pgcVSeg_down[pgcVSeg_down.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_down[pgcVSipsc_down.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']
intTable(df.sort_values(by = 'Combined'), save = False)
set(egVSipsc_down['Term']).intersection(set(pgcVSipsc_down['Term']))
set()
intersection = set(egVSipsc_down['Term']).intersection(set(pgcVSipsc_down['Term']))
df = pd.DataFrame(index = intersection)
egVSipsc_down.index = egVSipsc_down.Term
pgcVSipsc_down.index = pgcVSipsc_down.Term
df['Scores_egVSipsc_down'] = egVSipsc_down[egVSipsc_down.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_down'] = pgcVSipsc_down[pgcVSipsc_down.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_egVSipsc_down'] * df['Scores_pgcVSipsc_down']
intTable(df.sort_values(by = 'Combined'), save = False)
_ = venn3([set(egVSipsc_all['GO.ID']), set(pgcVSeg_all['GO.ID']), set(pgcVSipsc_all['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
set(pgcVSeg_all['Term']).intersection(set(pgcVSipsc_all['Term'])).intersection(set(egVSipsc_all['Term']))
{'G protein-coupled receptor activity'}
set(pgcVSeg_all['Term']).intersection(set(pgcVSipsc_all['Term']))
{'G protein-coupled peptide receptor activ...', 'G protein-coupled receptor activity', 'G-protein beta-subunit binding', 'Wnt-protein binding', 'antigen binding', 'calcium channel regulator activity', 'chloride channel activity', 'cholesterol binding', 'cytokine activity', 'cytokine binding', 'cytokine receptor activity', 'delayed rectifier potassium channel acti...', 'excitatory extracellular ligand-gated io...', 'extracellular matrix binding', 'extracellular matrix structural constitu...', 'fibroblast growth factor receptor bindin...', 'fibronectin binding', 'frizzled binding', 'glutamate receptor activity', 'growth factor activity', 'growth factor receptor binding', 'heparin binding', 'hormone activity', 'inward rectifier potassium channel activ...', 'ligand-gated cation channel activity', 'neuropeptide receptor activity', 'neuropeptide receptor binding', 'organic acid:sodium symporter activity', 'outward rectifier potassium channel acti...', 'peptide hormone binding', 'protein tyrosine kinase activator activi...', 'proteoglycan binding', 'scavenger receptor activity', 'serine-type endopeptidase inhibitor acti...', 'sialyltransferase activity', 'sulfotransferase activity', 'transmembrane receptor protein tyrosine ...', 'transmembrane signaling receptor activit...', 'transmitter-gated ion channel activity i...'}
set(egVSipsc_all['Term']).intersection(set(pgcVSipsc_all['Term']))
{'G protein-coupled receptor activity'}
egVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hEGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)
pgcVSeg_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_EGCLC_CC.xlsx', index_col = 0)
pgcVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)
egVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hEGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)
pgcVSeg_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_EGCLC_CC.xlsx', index_col = 0)
pgcVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)
#egVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hEGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)
#pgcVSeg_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_EGCLC_CC.xlsx', index_col = 0)
#pgcVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)
len(egVSipsc_up)
6
len(pgcVSeg_up)
15
len(pgcVSipsc_up)
15
_ = venn3([set(egVSipsc_up['GO.ID']), set(pgcVSeg_up['GO.ID']), set(pgcVSipsc_up['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
intersection = set(pgcVSeg_up['Term']).intersection(set(pgcVSipsc_up['Term']))
df = pd.DataFrame(index = intersection)
pgcVSeg_up.index = pgcVSeg_up.Term
pgcVSipsc_up.index = pgcVSipsc_up.Term
df['Scores_pgcVSeg_up'] = pgcVSeg_up[pgcVSeg_up.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_up[pgcVSipsc_up.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']
df.sort_values(by = 'Combined').head(10)
Scores_pgcVSeg_up | Scores_pgcVSipsc_up | Combined | |
---|---|---|---|
external side of plasma membrane | 1.600000e-07 | 5.300000e-07 | 8.480000e-14 |
collagen-containing extracellular matrix | 4.900000e-05 | 2.400000e-05 | 1.176000e-09 |
intermediate filament | 2.600000e-04 | 1.700000e-05 | 4.420000e-09 |
extracellular matrix | 1.100000e-05 | 3.680000e-03 | 4.048000e-08 |
basal plasma membrane | 1.000000e-04 | 4.800000e-04 | 4.800000e-08 |
blood microparticle | 3.660000e-03 | 3.800000e-04 | 1.390800e-06 |
brush border membrane | 1.299000e-02 | 9.300000e-03 | 1.208070e-04 |
Golgi lumen | 1.688000e-02 | 7.920000e-03 | 1.336896e-04 |
collagen trimer | 1.083000e-02 | 1.994000e-02 | 2.159502e-04 |
desmosome | 4.764000e-02 | 6.440000e-03 | 3.068016e-04 |
set(egVSipsc_up['Term']).intersection(set(pgcVSipsc_up['Term']))
set()
_ = venn3([set(egVSipsc_down['GO.ID']), set(pgcVSeg_down['GO.ID']), set(pgcVSipsc_down['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
/usr/local/lib/python3.8/dist-packages/matplotlib_venn/_venn3.py:53: UserWarning: Circle A has zero area
set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term'])).intersection(set(egVSipsc_down['Term']))
set()
egVSipsc_down
GO.ID | Term | Annotated | Significant | Expected | Statistics | ER | -log10(pvalue) | Significant/Annotated |
---|
set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))
{'AMPA glutamate receptor complex', 'GABA-ergic synapse', 'Golgi lumen', 'T-tubule', 'acrosomal membrane', 'anchored component of membrane', 'anchored component of plasma membrane', 'axon terminus', 'cation channel complex', 'chloride channel complex', 'collagen trimer', 'dendrite membrane', 'dendritic spine', 'dense core granule', 'excitatory synapse', 'external side of plasma membrane', 'extracellular matrix', 'glial cell projection', 'hippocampal mossy fiber to CA3 synapse', 'inhibitory synapse', 'integral component of postsynaptic densi...', 'integral component of postsynaptic speci...', 'integral component of presynaptic membra...', 'integral component of synaptic vesicle m...', 'intrinsic component of synaptic vesicle ...', 'ionotropic glutamate receptor complex', 'plasma membrane signaling receptor compl...', 'postsynaptic density membrane', 'postsynaptic membrane', 'receptor complex', 'sarcoplasmic reticulum membrane', 'sodium channel complex', 'voltage-gated potassium channel complex'}
intersection = set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))
df = pd.DataFrame(index = intersection)
pgcVSeg_down.index = pgcVSeg_down.Term
pgcVSipsc_down.index = pgcVSipsc_down.Term
df['Scores_pgcVSeg_up'] = pgcVSeg_down[pgcVSeg_down.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_down[pgcVSipsc_down.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']
intTable(df.sort_values(by = 'Combined'), save = False)
set(egVSipsc_down['Term']).intersection(set(pgcVSipsc_down['Term']))
set()
egVSipsc_up = pd.read_excel(tables_folder + 'GO_up_hEGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)
pgcVSeg_up = pd.read_excel(tables_folder + 'GO_up_hPGCLCs_vs_EGCLC_reactome.xlsx', index_col = 0)
pgcVSipsc_up = pd.read_excel(tables_folder + 'GO_up_hPGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)
egVSipsc_down = pd.read_excel(tables_folder + 'GO_down_hEGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)
pgcVSeg_down = pd.read_excel(tables_folder + 'GO_down_hPGCLCs_vs_EGCLC_reactome.xlsx', index_col = 0)
pgcVSipsc_down = pd.read_excel(tables_folder + 'GO_down_hPGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)
egVSipsc_all = pd.read_excel(tables_folder + 'GO_all_hEGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)
pgcVSeg_all = pd.read_excel(tables_folder + 'GO_all_hPGCLCs_vs_EGCLC_reactome.xlsx', index_col = 0)
pgcVSipsc_all = pd.read_excel(tables_folder + 'GO_all_hPGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)
#egVSipsc_up
pgcVSipsc_up
estimate | pvals | log10_pval | gene_annotated | n_gene_annotated | gene_significant | n_gene_significant | ER | genes_score | |
---|---|---|---|---|---|---|---|---|---|
source | |||||||||
REACTOME_GPCR_LIGAND_BINDING | inf | 0.000000e+00 | inf | ['GPR183', 'CCL2', 'CXCL3', 'CXCL10', 'CXCL6',... | 463 | ['GCGR', 'WNT2B', 'TRH', 'APLNR', 'NMUR1', 'PT... | 38 | 0.082073 | [39.49588643299154, 33.297928178377134, 40.062... |
REACTOME_EXTRACELLULAR_MATRIX_ORGANIZATION | 42.519016 | 3.026805e-43 | 42.519016 | ['BMP2', 'ICAM1', 'TNC', 'SDC4', 'SERPINE1', '... | 300 | ['HAPLN1', 'COL23A1', 'PLOD1', 'DMD', 'CTSL', ... | 32 | 0.106667 | [65.7986538088462, 103.36318933859657, 69.0811... |
REACTOME_RHO_GTPASE_CYCLE | 34.512749 | 3.070788e-35 | 34.512749 | ['JAG1', 'TRIP10', 'RHOB', 'CAVIN1', 'AKAP12',... | 450 | ['ARMCX3', 'PCDH7', 'ACTC1', 'DIAPH2', 'DSP', ... | 26 | 0.057778 | [54.70057406879335, 13.91909396274346, 84.6465... |
REACTOME_SIGNALING_BY_INTERLEUKINS | 33.179810 | 6.609813e-34 | 33.179810 | ['RIPK2', 'CCL2', 'MAP3K8', 'VEGFA', 'DUSP4', ... | 461 | ['IL4R', 'CSF3R', 'JUNB', 'NLRC5', 'NFKB2', 'F... | 25 | 0.054230 | [23.204773309513616, 81.91921767972629, 6.9696... |
REACTOME_NEUTROPHIL_DEGRANULATION | 33.179810 | 6.609813e-34 | 33.179810 | ['PTX3', 'PLAU', 'NFKB1', 'SLC2A3', 'CXCL1', '... | 478 | ['FUCA1', 'DSP', 'CAMP', 'BST1', 'HGSNAT', 'AG... | 25 | 0.052301 | [39.4499262012086, 39.92293469792166, 27.98103... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
REACTOME_REGULATION_OF_TP53_ACTIVITY | 6.614033 | 2.432021e-07 | 6.614033 | ['SGK1', 'TP53INP1', 'AURKA', 'CDK1', 'TPX2', ... | 160 | ['CHD3', 'PRDM1', 'POU4F1', 'SGK1', 'SMYD2'] | 5 | 0.031250 | [142.44874207108666, 30.476909844062614, 20.59... |
REACTOME_DISORDERS_OF_TRANSMEMBRANE_TRANSPORTERS | 6.614033 | 2.432021e-07 | 6.614033 | ['ABCA1', 'SLC2A1', 'CP', 'HK1', 'GCK', 'AVPR1... | 177 | ['GCKR', 'SLC2A9', 'SLCO2A1', 'ABCA1', 'SLC6A2'] | 5 | 0.028249 | [19.426605437676702, 29.234868870075427, 24.49... |
REACTOME_CILIUM_ASSEMBLY | 6.614033 | 2.432021e-07 | 6.614033 | ['TUBB2A', 'KIF3B', 'TUBA4A', 'DYNLL2', 'HDAC6... | 202 | ['KIF3A', 'PRKAR2B', 'CYS1', 'IFT172', 'KIFAP3'] | 5 | 0.024752 | [38.279452481075246, 90.7403994556073, 28.8877... |
REACTOME_NEDDYLATION | 6.614033 | 2.432021e-07 | 6.614033 | ['SPSB1', 'NFE2L2', 'CDKN1A', 'SOCS3', 'SQSTM1... | 245 | ['SPSB2', 'EPAS1', 'FBXO10', 'COMMD3', 'SPSB1'] | 5 | 0.020408 | [13.38792493848568, 6.904907507229172, 24.8095... |
REACTOME_ORGANELLE_BIOGENESIS_AND_MAINTENANCE | 6.614033 | 2.432021e-07 | 6.614033 | ['TUBB2A', 'SOD2', 'PPARGC1A', 'ACSS2', 'KIF3B... | 296 | ['KIF3A', 'PRKAR2B', 'CYS1', 'IFT172', 'KIFAP3'] | 5 | 0.016892 | [38.279452481075246, 90.7403994556073, 28.8877... |
189 rows × 9 columns
_ = venn3([set(), set(pgcVSeg_up.index.tolist()), set(pgcVSipsc_up.index.tolist())], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
/usr/local/lib/python3.8/dist-packages/matplotlib_venn/_venn3.py:53: UserWarning: Circle A has zero area
intersection = set(pgcVSeg_up.index).intersection(set(pgcVSipsc_up.index))
df = pd.DataFrame(index = intersection)
pgcVSeg_up.index = pgcVSeg_up.index
pgcVSipsc_up.index = pgcVSipsc_up.index
df['Scores_pgcVSeg_up'] = pgcVSeg_up[pgcVSeg_up.index.isin(intersection)]['pvals']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_up[pgcVSipsc_up.index.isin(intersection)]['pvals']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']
intTable(df.sort_values(by = 'Combined'), save = False)
set(egVSipsc_up.index).intersection(set(pgcVSipsc_up.index))
set()
_ = venn3([set(), set(pgcVSeg_down.index), set(pgcVSipsc_down.index)], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))
/usr/local/lib/python3.8/dist-packages/matplotlib_venn/_venn3.py:53: UserWarning: Circle A has zero area
set(pgcVSeg_down.index).intersection(set(pgcVSipsc_down.index)).intersection(set(egVSipsc_down.index))
set()
#set(pgcVSeg_down['source']).intersection(set(pgcVSipsc_down['source']))