import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib_venn import venn3, venn2
import seaborn as sns
import scanpy as sc
import glob

import sys

sys.path.append("./../../../../utilities_folder/")

from utilities import intTable


import rpy2.rinterface_lib.callbacks
import anndata2ri
import logging

from rpy2.robjects import pandas2ri
import rpy2.robjects as ro
from scipy import stats

rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython


%matplotlib inline
sc.set_figure_params(dpi = 600, fontsize = 20)

plt.rcParams['pdf.fonttype'] = 'truetype'

cmap_up = sns.light_palette("red", as_cmap=True)
cmap_down = sns.light_palette("blue", as_cmap=True)
cmap_all = sns.light_palette("seagreen", as_cmap=True)


tables_folder = './tables/'


egVSipsc = pd.read_excel(tables_folder + 'Bulk_hEGCLCs_vs_hiPSC_filtered.xlsx', index_col = 0)
egVSpgc = pd.read_excel(tables_folder + 'Bulk_hPGCLCs_vs_EGCLC_filtered.xlsx', index_col = 0)
ipscVSpgc = pd.read_excel(tables_folder + 'Bulk_hPGCLCs_vs_hiPSC_filtered.xlsx', index_col = 0)


up_eg_vs_ipsc = egVSipsc[egVSipsc.logFC > 0].index.tolist()
len(up_eg_vs_ipsc)

82


down_eg_vs_ipsc = egVSipsc[egVSipsc.logFC < 0].index.tolist()
len(down_eg_vs_ipsc)

34


egVSpgc


up_pgc_vs_eg = egVSpgc[egVSpgc.logFC > 0].index.tolist()
down_pgc_vs_eg = egVSpgc[egVSpgc.logFC < 0].index.tolist()

len(up_pgc_vs_eg)

1024


len(down_pgc_vs_eg)

1639


ipscVSpgc


up_pgc_vs_ipsc = ipscVSpgc[ipscVSpgc.logFC > 0].index.tolist()
down_pgc_vs_ipsc = ipscVSpgc[ipscVSpgc.logFC < 0].index.tolist()

len(up_pgc_vs_ipsc)

953


len(down_pgc_vs_ipsc)

1632


_ = venn3([set(egVSipsc.index), set(egVSpgc.index), set(ipscVSpgc.index)], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))


_ = venn3([set(up_pgc_vs_eg), set(up_pgc_vs_ipsc), set(up_eg_vs_ipsc)], set_labels=('UP hPGCLCs_vs_hEGCLC', 'UP hPGCLCs_vs_hiPSC', 'UP hEGCLCs_vs_hiPSC') )


_ = venn3([set(down_pgc_vs_eg), set(down_pgc_vs_ipsc), set(down_eg_vs_ipsc)], set_labels=('DOWN hPGCLCs_vs_hEGCLC', 'DOWN hPGCLCs_vs_hiPSC', 'DOWN hEGCLCs_vs_hiPSC') )


venn2([set(up_pgc_vs_eg), set(up_pgc_vs_ipsc)], set_labels=('UP hPGCLCs_vs_hEGCLC', 'UP hPGCLCs_vs_hiPSC') )

<matplotlib_venn._common.VennDiagram at 0x7fa16cefc220>


venn2([set(up_pgc_vs_eg), set(up_eg_vs_ipsc)], set_labels=('UP hPGCLCs_vs_hEGCLC', 'UP hEGCLCs_vs_hiPSC') )

<matplotlib_venn._common.VennDiagram at 0x7fa16cf1c940>


venn2([set(up_pgc_vs_ipsc), set(up_eg_vs_ipsc)], set_labels=('UP hPGCLCs_vs_hiPSC', 'UP hEGCLCs_vs_hiPSC') )

<matplotlib_venn._common.VennDiagram at 0x7fa16ce75700>


venn2([set(up_eg_vs_ipsc), set(down_pgc_vs_eg)], set_labels=('UP hEGCLC_vs_hiPSC', 'UP hEGCLC_vs_hPGCLC') )

<matplotlib_venn._common.VennDiagram at 0x7fa16ce47070>


venn2([set(up_eg_vs_ipsc), set(down_pgc_vs_ipsc)], set_labels=('UP hEGCLC_vs_hiPSC', 'UP hPGCLC_vs_hiPSC') )

<matplotlib_venn._common.VennDiagram at 0x7fa16d559ac0>


venn2([set(down_eg_vs_ipsc), set(up_pgc_vs_eg)], set_labels=('UP hiPSC_vs_hEGCLC', 'UP hPGCLC_vs_hEGCLC') )

<matplotlib_venn._common.VennDiagram at 0x7fa16d027940>


venn2([set(down_eg_vs_ipsc), set(down_pgc_vs_eg)], set_labels=('UP hiPSC_vs_hEGCLC', 'UP hEGCLC_vs_hPGCLC') )

<matplotlib_venn._common.VennDiagram at 0x7fa151d75220>


venn2([set(down_eg_vs_ipsc), set(up_pgc_vs_ipsc)], set_labels=('UP hiPSC_vs_hEGCLC', 'UP hPGCLC_vs_hiPSC') )

<matplotlib_venn._common.VennDiagram at 0x7fa1675cef70>


venn2([set(down_eg_vs_ipsc), set(down_pgc_vs_ipsc)], set_labels=('UP hiPSC_vs_hEGCLC', 'UP hiPSC_vs_hPGCLC') )

<matplotlib_venn._common.VennDiagram at 0x7fa16cd6dfd0>


venn2([set(up_pgc_vs_eg), set(down_pgc_vs_ipsc)], set_labels=('UP hPGCLC_vs_hEGCLC', 'UP hiPSC_vs_hPGCLC') )

<matplotlib_venn._common.VennDiagram at 0x7fa16cd9b2e0>


venn2([set(down_pgc_vs_eg), set(up_pgc_vs_ipsc)], set_labels=('UP hEGCLC_vs_hPGCLC', 'UP hPGCLC_vs_hiPSC') )

<matplotlib_venn._common.VennDiagram at 0x7fa16cd63070>


venn2([set(down_pgc_vs_eg), set(down_pgc_vs_ipsc)], set_labels=('UP hEGCLC_vs_hPGCLC', 'UP hiPSC_vs_hPGCLC') )

<matplotlib_venn._common.VennDiagram at 0x7fa16ccb0610>


%%R -i up_eg_vs_ipsc -i down_eg_vs_ipsc -i up_pgc_vs_eg -i down_pgc_vs_eg -i up_pgc_vs_ipsc -i down_pgc_vs_ipsc

loc <- './../../../../R_loc' # pointing to the renv environment

.libPaths(loc)

library(GeneOverlap) 

up <- list(up_eg_vs_ipsc, up_pgc_vs_eg, up_pgc_vs_ipsc)
names(up) <- c('up_eg_vs_ipsc', 'up_pgc_vs_eg', 'up_pgc_vs_ipsc')

down <- list(down_eg_vs_ipsc, down_pgc_vs_eg, down_pgc_vs_ipsc)
names(down) <- c('down_eg_vs_ipsc', 'down_pgc_vs_eg', 'down_pgc_vs_ipsc')

gom.obj_up_down <- newGOM(up, down, genome.size = 14582)
gom.obj_up_up <- newGOM(up, genome.size = 14582)
gom.obj_down_down <- newGOM(down, genome.size = 14582)


%%R
drawHeatmap(gom.obj_up_down, what = 'Jaccard', grid.col="Greens", note.col="white")


%%R
drawHeatmap(gom.obj_up_up, what = 'Jaccard', grid.col="Reds", note.col="white")


%%R
drawHeatmap(gom.obj_down_down, what = 'Jaccard', grid.col="Blues",  note.col="white")


l1 = list(set(egVSipsc.index))
l1.sort()

l2 = list(set(egVSpgc.index))
l2.sort()

l3 = list(set(ipscVSpgc.index))
l3.sort()


upsetplot_df = pd.DataFrame([l1,l2,l3]).T
upsetplot_df.columns = ['hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC']


stats_pos = {}

for i, n in zip([l1,l2,l3], upsetplot_df.columns):
    for j, m in zip([l1,l2,l3], upsetplot_df.columns):
        key = n + '-' + m
        set1 = set(i)
        set2 = set(j)
        intersection = len(set1.intersection(set2))
        union = len(set1.union(set2))
        stats_pos[key] = intersection/union


stats_pos

{'hEGCLCs_vs_hiPSC-hEGCLCs_vs_hiPSC': 1.0,
 'hEGCLCs_vs_hiPSC-hPGCLCs_vs_hEGCLC': 0.025082995204721504,
 'hEGCLCs_vs_hiPSC-hPGCLCs_vs_hiPSC': 0.014650638617580767,
 'hPGCLCs_vs_hEGCLC-hEGCLCs_vs_hiPSC': 0.025082995204721504,
 'hPGCLCs_vs_hEGCLC-hPGCLCs_vs_hEGCLC': 1.0,
 'hPGCLCs_vs_hEGCLC-hPGCLCs_vs_hiPSC': 0.7297297297297297,
 'hPGCLCs_vs_hiPSC-hEGCLCs_vs_hiPSC': 0.014650638617580767,
 'hPGCLCs_vs_hiPSC-hPGCLCs_vs_hEGCLC': 0.7297297297297297,
 'hPGCLCs_vs_hiPSC-hPGCLCs_vs_hiPSC': 1.0}


df_to_plot = pd.DataFrame(stats_pos.values(), index = stats_pos.keys())
i1 = pd.Series(df_to_plot.index).apply(lambda x: x.split('-')[0])
i2 = pd.Series(df_to_plot.index).apply(lambda x: x.split('-')[1])
df_to_plot = df_to_plot.reset_index()
df_to_plot['i1'] = i1
df_to_plot['i2'] = i2
df_to_plot = df_to_plot.pivot(index='i1', columns='i2', values=0)


plt.figure(figsize = (5, 5))
mask = np.triu(np.ones_like(df_to_plot, dtype=bool))
sns.heatmap(df_to_plot, mask=mask,
            square=True, linewidths=.5, cbar_kws={"shrink": .5}, cmap = 'PuBu')

<AxesSubplot:xlabel='i2', ylabel='i1'>


egVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hEGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)
pgcVSeg_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_EGCLC_BP.xlsx', index_col = 0)
pgcVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)


egVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hEGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)
pgcVSeg_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_EGCLC_BP.xlsx', index_col = 0)
pgcVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)


egVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hEGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)
pgcVSeg_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_EGCLC_BP.xlsx', index_col = 0)
pgcVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_hiPSC_BP.xlsx', index_col = 0)


len(egVSipsc_up)

15


len(pgcVSeg_up)

159


len(pgcVSipsc_up)

128


_ = venn3([set(egVSipsc_up['GO.ID']), set(pgcVSeg_up['GO.ID']), set(pgcVSipsc_up['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))


pgcVSeg_up


intersection = set(pgcVSeg_up['Term']).intersection(set(pgcVSipsc_up['Term']))
df = pd.DataFrame(index = intersection)
pgcVSeg_up.index = pgcVSeg_up.Term
pgcVSipsc_up.index = pgcVSipsc_up.Term
df['Scores_pgcVSeg_up'] = pgcVSeg_up[pgcVSeg_up.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_up[pgcVSipsc_up.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']


intTable(df.sort_values(by = 'Combined'), save = False)


set(egVSipsc_up['Term']).intersection(set(pgcVSipsc_up['Term']))

set()


set(egVSipsc_up['Term'])

{'DNA methylation',
 'aging',
 'aromatic compound catabolic process',
 'cellular process involved in reproductio...',
 'gene silencing by RNA',
 'germ cell development',
 'learning or memory',
 'locomotory behavior',
 'methylation',
 'negative regulation of phosphorylation',
 'negative regulation of protein-containin...',
 'organic cyclic compound catabolic proces...',
 'piRNA metabolic process',
 'response to oxidative stress',
 'spermatogenesis'}


_ = venn3([set(egVSipsc_down['GO.ID']), set(pgcVSeg_down['GO.ID']), set(pgcVSipsc_down['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))


set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term'])).intersection(set(egVSipsc_down['Term']))

set()


set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))

{'B cell receptor signaling pathway',
 'G protein-coupled receptor signaling pat...',
 'activation of transmembrane receptor pro...',
 'adenylate cyclase-activating G protein-c...',
 'adult locomotory behavior',
 'antimicrobial humoral immune response me...',
 'behavioral fear response',
 'calcium ion transmembrane import into cy...',
 'calcium ion-regulated exocytosis of neur...',
 'cellular response to calcium ion',
 'cellular sodium ion homeostasis',
 'chemokine-mediated signaling pathway',
 'chloride transmembrane transport',
 'chondrocyte proliferation',
 'complement activation, classical pathway',
 'detection of temperature stimulus',
 'dopamine metabolic process',
 'engulfment of apoptotic cell',
 'excitatory postsynaptic potential',
 'exploration behavior',
 'eye photoreceptor cell development',
 'gamma-aminobutyric acid signaling pathwa...',
 'ionotropic glutamate receptor signaling ...',
 'locomotory behavior',
 'lymph vessel morphogenesis',
 'membrane depolarization',
 'memory',
 'monocyte chemotaxis',
 'multicellular organismal response to str...',
 'negative regulation of angiogenesis',
 'negative regulation of blood pressure',
 'negative regulation of fibroblast growth...',
 'negative regulation of leukocyte apoptot...',
 'negative regulation of vascular permeabi...',
 'neuron maturation',
 'neuronal action potential',
 'neuropeptide signaling pathway',
 'neurotransmitter metabolic process',
 'neurotransmitter transport',
 'neurotransmitter-gated ion channel clust...',
 'nitric oxide mediated signal transductio...',
 'phagocytosis, engulfment',
 'phagocytosis, recognition',
 'positive regulation of ERK1 and ERK2 cas...',
 'positive regulation of G protein-coupled...',
 'positive regulation of calcium ion trans...',
 'positive regulation of cytosolic calcium...',
 'positive regulation of dendrite extensio...',
 'positive regulation of developmental gro...',
 'positive regulation of excitatory postsy...',
 'positive regulation of phosphatidylinosi...',
 'positive regulation of positive chemotax...',
 'positive regulation of protein kinase B ...',
 'positive regulation of stem cell prolife...',
 'positive regulation of synapse assembly',
 'positive regulation of synaptic transmis...',
 'potassium ion import across plasma membr...',
 'potassium ion transmembrane transport',
 'prostaglandin secretion',
 'purinergic nucleotide receptor signaling...',
 'receptor localization to synapse',
 'regulation of AMPA receptor activity',
 'regulation of action potential',
 'regulation of cytosolic calcium ion conc...',
 'regulation of dopamine secretion',
 'regulation of humoral immune response',
 'regulation of ion transmembrane transpor...',
 'regulation of membrane potential',
 'regulation of neuronal synaptic plastici...',
 'regulation of neurotransmitter secretion',
 'regulation of neutrophil chemotaxis',
 'regulation of postsynaptic membrane pote...',
 'regulation of postsynaptic neurotransmit...',
 'regulation of presynapse assembly',
 'regulation of ryanodine-sensitive calciu...',
 'regulation of short-term neuronal synapt...',
 'regulation of smooth muscle contraction',
 'regulation of synaptic vesicle exocytosi...',
 'regulation of ventricular cardiac muscle...',
 'relaxation of muscle',
 'sensory perception of smell',
 'skeletal muscle contraction',
 'sodium ion transmembrane transport',
 'synaptic membrane adhesion',
 'vascular endothelial growth factor signa...',
 'visual learning'}


pgcVSeg_down= pgcVSeg_down[~pgcVSeg_down.index.duplicated()]
pgcVSipsc_down= pgcVSipsc_down[~pgcVSipsc_down.index.duplicated()]


intersection = set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))
df = pd.DataFrame(index = intersection)
#pgcVSeg_down.index = pgcVSeg_down.Term
#pgcVSipsc_down.index = pgcVSipsc_down.Term
df['Scores_pgcVSeg_up'] = pgcVSeg_down[pgcVSeg_down.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_down[pgcVSipsc_down.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']


intTable(df.sort_values(by = 'Combined'), save = False)


set(egVSipsc_down['Term']).intersection(set(pgcVSipsc_down['Term']))

set()


intersection = set(egVSipsc_down['Term']).intersection(set(pgcVSipsc_down['Term']))
df = pd.DataFrame(index = intersection)
egVSipsc_down.index = egVSipsc_down.Term
pgcVSipsc_down.index = pgcVSipsc_down.Term
df['Scores_egVSipsc_down'] = egVSipsc_down[egVSipsc_down.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_down'] = pgcVSipsc_down[pgcVSipsc_down.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_egVSipsc_down'] * df['Scores_pgcVSipsc_down']


intTable(df.sort_values(by = 'Combined'), save = False)


_ = venn3([set(egVSipsc_all['GO.ID']), set(pgcVSeg_all['GO.ID']), set(pgcVSipsc_all['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))


set(pgcVSeg_all['Term']).intersection(set(pgcVSipsc_all['Term'])).intersection(set(egVSipsc_all['Term']))

{'postsynaptic membrane'}


set(pgcVSeg_all['Term']).intersection(set(pgcVSipsc_all['Term']))

{'AMPA glutamate receptor complex',
 'GABA-ergic synapse',
 'Golgi lumen',
 'anchored component of membrane',
 'anchored component of plasma membrane',
 'blood microparticle',
 'chloride channel complex',
 'collagen trimer',
 'dendrite membrane',
 'dense core granule',
 'excitatory synapse',
 'external side of plasma membrane',
 'extracellular matrix',
 'glial cell projection',
 'hippocampal mossy fiber to CA3 synapse',
 'integral component of postsynaptic densi...',
 'integral component of postsynaptic speci...',
 'integral component of presynaptic membra...',
 'intrinsic component of synaptic vesicle ...',
 'postsynaptic density membrane',
 'postsynaptic membrane',
 'receptor complex',
 'sodium channel complex',
 'terminal bouton',
 'voltage-gated potassium channel complex'}


set(egVSipsc_all['Term']).intersection(set(pgcVSipsc_all['Term']))

{'postsynaptic membrane'}


egVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hEGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)
pgcVSeg_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_EGCLC_MF.xlsx', index_col = 0)
pgcVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)


egVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hEGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)
pgcVSeg_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_EGCLC_MF.xlsx', index_col = 0)
pgcVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)


egVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hEGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)
pgcVSeg_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_EGCLC_MF.xlsx', index_col = 0)
pgcVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_hiPSC_MF.xlsx', index_col = 0)


len(egVSipsc_up)

9


len(pgcVSeg_up)

22


len(pgcVSipsc_up)

19


_ = venn3([set(egVSipsc_up['GO.ID']), set(pgcVSeg_up['GO.ID']), set(pgcVSipsc_up['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))


pgcVSeg_up


pgcVSeg_up[pgcVSeg_up.Term.isin(intersection)]


len(pgcVSeg_up[pgcVSeg_up.Term.isin(intersection)])

0


len(intersection)

0


pgcVSipsc_up[pgcVSipsc_up.Term.isin(intersection)]


intersection = set(pgcVSeg_up['Term']).intersection(set(pgcVSipsc_up['Term']))
df = pd.DataFrame(index = intersection)
pgcVSeg_up.index = pgcVSeg_up.Term
pgcVSipsc_up.index = pgcVSipsc_up.Term
df['Scores_pgcVSeg_up'] = pgcVSeg_up[pgcVSeg_up.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_up[pgcVSipsc_up.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']


intTable(df.sort_values(by = 'Combined'), save = False)


set(egVSipsc_up['Term']).intersection(set(pgcVSipsc_up['Term']))

set()


set(egVSipsc_up['Term'])

{'DNA-binding transcription repressor acti...',
 'active ion transmembrane transporter act...',
 'active transmembrane transporter activit...',
 'cation transmembrane transporter activit...',
 'inorganic cation transmembrane transport...',
 'mRNA binding',
 'phosphoric ester hydrolase activity',
 'structural molecule activity'}


_ = venn3([set(egVSipsc_down['GO.ID']), set(pgcVSeg_down['GO.ID']), set(pgcVSipsc_down['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))


set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term'])).intersection(set(egVSipsc_down['Term']))

set()


set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))

{'CoA-ligase activity',
 'G protein-coupled peptide receptor activ...',
 'G protein-coupled receptor activity',
 'G-protein beta-subunit binding',
 'Wnt-protein binding',
 'alkali metal ion binding',
 'amyloid-beta binding',
 'antigen binding',
 'calcium channel regulator activity',
 'calcium-dependent phospholipid binding',
 'chloride channel activity',
 'cytokine activity',
 'cytokine receptor activity',
 'excitatory extracellular ligand-gated io...',
 'extracellular matrix binding',
 'extracellular matrix structural constitu...',
 'fibroblast growth factor receptor bindin...',
 'glutamate receptor activity',
 'growth factor activity',
 'hormone activity',
 'inward rectifier potassium channel activ...',
 'ligand-gated cation channel activity',
 'neuropeptide receptor activity',
 'neuropeptide receptor binding',
 'organic acid:sodium symporter activity',
 'peptide hormone binding',
 'phosphatidylserine binding',
 'protein tyrosine kinase activator activi...',
 'proteoglycan binding',
 'serine-type endopeptidase activity',
 'serine-type endopeptidase inhibitor acti...',
 'sulfotransferase activity',
 'transmembrane receptor protein tyrosine ...',
 'transmembrane signaling receptor activit...',
 'transmitter-gated ion channel activity i...',
 'voltage-gated ion channel activity'}


pgcVSeg_down= pgcVSeg_down[~pgcVSeg_down.index.duplicated()]
pgcVSipsc_down= pgcVSipsc_down[~pgcVSipsc_down.index.duplicated()]


intersection = set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))
df = pd.DataFrame(index = intersection)

df['Scores_pgcVSeg_up'] = pgcVSeg_down[pgcVSeg_down.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_down[pgcVSipsc_down.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']


intTable(df.sort_values(by = 'Combined'), save = False)


set(egVSipsc_down['Term']).intersection(set(pgcVSipsc_down['Term']))

set()


intersection = set(egVSipsc_down['Term']).intersection(set(pgcVSipsc_down['Term']))
df = pd.DataFrame(index = intersection)
egVSipsc_down.index = egVSipsc_down.Term
pgcVSipsc_down.index = pgcVSipsc_down.Term
df['Scores_egVSipsc_down'] = egVSipsc_down[egVSipsc_down.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_down'] = pgcVSipsc_down[pgcVSipsc_down.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_egVSipsc_down'] * df['Scores_pgcVSipsc_down']


intTable(df.sort_values(by = 'Combined'), save = False)


_ = venn3([set(egVSipsc_all['GO.ID']), set(pgcVSeg_all['GO.ID']), set(pgcVSipsc_all['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))


set(pgcVSeg_all['Term']).intersection(set(pgcVSipsc_all['Term'])).intersection(set(egVSipsc_all['Term']))

{'G protein-coupled receptor activity'}


set(pgcVSeg_all['Term']).intersection(set(pgcVSipsc_all['Term']))

{'G protein-coupled peptide receptor activ...',
 'G protein-coupled receptor activity',
 'G-protein beta-subunit binding',
 'Wnt-protein binding',
 'antigen binding',
 'calcium channel regulator activity',
 'chloride channel activity',
 'cholesterol binding',
 'cytokine activity',
 'cytokine binding',
 'cytokine receptor activity',
 'delayed rectifier potassium channel acti...',
 'excitatory extracellular ligand-gated io...',
 'extracellular matrix binding',
 'extracellular matrix structural constitu...',
 'fibroblast growth factor receptor bindin...',
 'fibronectin binding',
 'frizzled binding',
 'glutamate receptor activity',
 'growth factor activity',
 'growth factor receptor binding',
 'heparin binding',
 'hormone activity',
 'inward rectifier potassium channel activ...',
 'ligand-gated cation channel activity',
 'neuropeptide receptor activity',
 'neuropeptide receptor binding',
 'organic acid:sodium symporter activity',
 'outward rectifier potassium channel acti...',
 'peptide hormone binding',
 'protein tyrosine kinase activator activi...',
 'proteoglycan binding',
 'scavenger receptor activity',
 'serine-type endopeptidase inhibitor acti...',
 'sialyltransferase activity',
 'sulfotransferase activity',
 'transmembrane receptor protein tyrosine ...',
 'transmembrane signaling receptor activit...',
 'transmitter-gated ion channel activity i...'}


set(egVSipsc_all['Term']).intersection(set(pgcVSipsc_all['Term']))

{'G protein-coupled receptor activity'}


egVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hEGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)
pgcVSeg_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_EGCLC_CC.xlsx', index_col = 0)
pgcVSipsc_up = pd.read_excel(tables_folder + 'GO_upregulated_hPGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)


egVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hEGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)
pgcVSeg_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_EGCLC_CC.xlsx', index_col = 0)
pgcVSipsc_down = pd.read_excel(tables_folder + 'GO_downregulated_hPGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)


#egVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hEGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)
#pgcVSeg_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_EGCLC_CC.xlsx', index_col = 0)
#pgcVSipsc_all = pd.read_excel(tables_folder + 'GO_allSignificant_hPGCLCs_vs_hiPSC_CC.xlsx', index_col = 0)


len(egVSipsc_up)

6


len(pgcVSeg_up)

15


len(pgcVSipsc_up)

15


_ = venn3([set(egVSipsc_up['GO.ID']), set(pgcVSeg_up['GO.ID']), set(pgcVSipsc_up['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))


intersection = set(pgcVSeg_up['Term']).intersection(set(pgcVSipsc_up['Term']))
df = pd.DataFrame(index = intersection)
pgcVSeg_up.index = pgcVSeg_up.Term
pgcVSipsc_up.index = pgcVSipsc_up.Term
df['Scores_pgcVSeg_up'] = pgcVSeg_up[pgcVSeg_up.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_up[pgcVSipsc_up.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']


df.sort_values(by = 'Combined').head(10)


set(egVSipsc_up['Term']).intersection(set(pgcVSipsc_up['Term']))

set()


_ = venn3([set(egVSipsc_down['GO.ID']), set(pgcVSeg_down['GO.ID']), set(pgcVSipsc_down['GO.ID'])], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))

/usr/local/lib/python3.8/dist-packages/matplotlib_venn/_venn3.py:53: UserWarning:

Circle A has zero area


set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term'])).intersection(set(egVSipsc_down['Term']))

set()


egVSipsc_down


set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))

{'AMPA glutamate receptor complex',
 'GABA-ergic synapse',
 'Golgi lumen',
 'T-tubule',
 'acrosomal membrane',
 'anchored component of membrane',
 'anchored component of plasma membrane',
 'axon terminus',
 'cation channel complex',
 'chloride channel complex',
 'collagen trimer',
 'dendrite membrane',
 'dendritic spine',
 'dense core granule',
 'excitatory synapse',
 'external side of plasma membrane',
 'extracellular matrix',
 'glial cell projection',
 'hippocampal mossy fiber to CA3 synapse',
 'inhibitory synapse',
 'integral component of postsynaptic densi...',
 'integral component of postsynaptic speci...',
 'integral component of presynaptic membra...',
 'integral component of synaptic vesicle m...',
 'intrinsic component of synaptic vesicle ...',
 'ionotropic glutamate receptor complex',
 'plasma membrane signaling receptor compl...',
 'postsynaptic density membrane',
 'postsynaptic membrane',
 'receptor complex',
 'sarcoplasmic reticulum membrane',
 'sodium channel complex',
 'voltage-gated potassium channel complex'}


intersection = set(pgcVSeg_down['Term']).intersection(set(pgcVSipsc_down['Term']))
df = pd.DataFrame(index = intersection)
pgcVSeg_down.index = pgcVSeg_down.Term
pgcVSipsc_down.index = pgcVSipsc_down.Term
df['Scores_pgcVSeg_up'] = pgcVSeg_down[pgcVSeg_down.Term.isin(intersection)]['Statistics']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_down[pgcVSipsc_down.Term.isin(intersection)]['Statistics']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']


intTable(df.sort_values(by = 'Combined'), save = False)


set(egVSipsc_down['Term']).intersection(set(pgcVSipsc_down['Term']))

set()


egVSipsc_up = pd.read_excel(tables_folder + 'GO_up_hEGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)
pgcVSeg_up = pd.read_excel(tables_folder + 'GO_up_hPGCLCs_vs_EGCLC_reactome.xlsx', index_col = 0)
pgcVSipsc_up = pd.read_excel(tables_folder + 'GO_up_hPGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)


egVSipsc_down = pd.read_excel(tables_folder + 'GO_down_hEGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)
pgcVSeg_down = pd.read_excel(tables_folder + 'GO_down_hPGCLCs_vs_EGCLC_reactome.xlsx', index_col = 0)
pgcVSipsc_down = pd.read_excel(tables_folder + 'GO_down_hPGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)


egVSipsc_all = pd.read_excel(tables_folder + 'GO_all_hEGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)
pgcVSeg_all = pd.read_excel(tables_folder + 'GO_all_hPGCLCs_vs_EGCLC_reactome.xlsx', index_col = 0)
pgcVSipsc_all = pd.read_excel(tables_folder + 'GO_all_hPGCLCs_vs_hiPSC_reactome.xlsx', index_col = 0)


#egVSipsc_up


pgcVSipsc_up


_ = venn3([set(), set(pgcVSeg_up.index.tolist()), set(pgcVSipsc_up.index.tolist())], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))

/usr/local/lib/python3.8/dist-packages/matplotlib_venn/_venn3.py:53: UserWarning:

Circle A has zero area


intersection = set(pgcVSeg_up.index).intersection(set(pgcVSipsc_up.index))
df = pd.DataFrame(index = intersection)
pgcVSeg_up.index = pgcVSeg_up.index
pgcVSipsc_up.index = pgcVSipsc_up.index
df['Scores_pgcVSeg_up'] = pgcVSeg_up[pgcVSeg_up.index.isin(intersection)]['pvals']
df['Scores_pgcVSipsc_up'] = pgcVSipsc_up[pgcVSipsc_up.index.isin(intersection)]['pvals']
df['Combined'] = df['Scores_pgcVSeg_up'] * df['Scores_pgcVSipsc_up']


intTable(df.sort_values(by = 'Combined'), save = False)


set(egVSipsc_up.index).intersection(set(pgcVSipsc_up.index))

set()


_ = venn3([set(), set(pgcVSeg_down.index), set(pgcVSipsc_down.index)], set_labels=('hEGCLCs_vs_hiPSC', 'hPGCLCs_vs_hEGCLC', 'hPGCLCs_vs_hiPSC'))

/usr/local/lib/python3.8/dist-packages/matplotlib_venn/_venn3.py:53: UserWarning:

Circle A has zero area


set(pgcVSeg_down.index).intersection(set(pgcVSipsc_down.index)).intersection(set(egVSipsc_down.index))

set()


#set(pgcVSeg_down['source']).intersection(set(pgcVSipsc_down['source']))

	logFC	logCPM	LR	PValue	FDR	Gene	-log10(FDR)
IGF1	15.896291	6.846814	2094.605002	0.000000	1.671936e-316	IGF1	315.776780
SOX17	13.895054	8.632823	2928.672152	0.000000	1.671936e-316	SOX17	315.776780
CHI3L2	13.443653	6.417250	2160.317252	0.000000	1.671936e-316	CHI3L2	315.776780
IRX6	12.703410	6.493450	2260.318922	0.000000	1.671936e-316	IRX6	315.776780
NANOS3	12.560026	8.623343	1951.820313	0.000000	1.671936e-316	NANOS3	315.776780
...	...	...	...	...	...	...	...
ATP4A	-2.398388	-0.822223	8.925405	0.002812	4.787297e-03	ATP4A	2.319910
KCNE2	-2.142631	-0.700751	8.582153	0.003395	5.709500e-03	KCNE2	2.243402
PPP1R36	-2.143626	0.183762	8.424237	0.003703	6.191570e-03	PPP1R36	2.208199
BDKRB2	-2.144906	-0.085745	8.318019	0.003925	6.543553e-03	BDKRB2	2.184186
EXOC3L2	-2.121139	1.862711	7.898945	0.004946	8.133181e-03	EXOC3L2	2.089740

	logFC	logCPM	LR	PValue	FDR	Gene	-log10(FDR)
SOX17	17.549030	8.894461	2025.498153	0.000000	5.866289e-316	SOX17	315.231637
IGF1	15.801469	7.113052	1664.385070	0.000000	5.866289e-316	IGF1	315.231637
IRX6	15.460311	6.758986	1915.660894	0.000000	5.866289e-316	IRX6	315.231637
WNT2	15.321818	6.614517	2056.095895	0.000000	5.866289e-316	WNT2	315.231637
CHI3L2	12.600977	6.683063	1781.461462	0.000000	5.866289e-316	CHI3L2	315.231637
...	...	...	...	...	...	...	...
LRRC73	-2.415578	-0.129566	11.514055	0.000691	1.260929e-03	LRRC73	2.899309
CAPS2	-2.001239	-0.344132	11.222459	0.000808	1.464992e-03	CAPS2	2.834165
ERBB4	-2.021953	-0.359235	10.757452	0.001039	1.856177e-03	ERBB4	2.731381
OR14K1	-2.033268	-0.115466	9.782277	0.001762	3.062090e-03	OR14K1	2.513982
ZNF540	2.169959	0.052795	7.592525	0.005861	9.460098e-03	ZNF540	2.024104

	GO.ID	Term	Annotated	Significant	Expected	Statistics	ER	-log10(pvalue)	Significant/Annotated
1	GO:0045165	cell fate commitment	170	46	12.49	8.400000e-08	3.68	7.075721	0.270588
2	GO:0001709	cell fate determination	22	10	1.62	1.200000e-06	6.17	5.920819	0.454545
3	GO:0003161	cardiac conduction system development	27	11	1.98	1.400000e-06	5.56	5.853872	0.407407
4	GO:0060070	canonical Wnt signaling pathway	258	39	18.95	3.700000e-06	2.06	5.431798	0.151163
5	GO:0007422	peripheral nervous system development	68	16	4.99	3.900000e-06	3.21	5.408935	0.235294
...	...	...	...	...	...	...	...	...	...
155	GO:0019369	arachidonic acid metabolic process	33	7	2.42	8.900000e-03	2.89	2.050610	0.212121
156	GO:0001947	heart looping	50	9	3.67	9.750000e-03	2.45	2.010995	0.180000
157	GO:0055007	cardiac muscle cell differentiation	82	13	6.02	9.800000e-03	2.16	2.008774	0.158537
158	GO:0031954	positive regulation of protein autophosp...	26	6	1.91	9.950000e-03	3.14	2.002177	0.230769
159	GO:0051385	response to mineralocorticoid	26	6	1.91	9.950000e-03	3.14	2.002177	0.230769

	GO.ID	Term	Annotated	Significant	Expected	Statistics	ER	-log10(pvalue)	Significant/Annotated
1	GO:0001228	DNA-binding transcription activator acti...	329	57	24.10	7.200000e-10	2.37	9.142668	0.173252
2	GO:0005109	frizzled binding	25	12	1.83	4.700000e-08	6.56	7.327902	0.480000
3	GO:0005125	cytokine activity	102	20	7.47	4.700000e-05	2.68	4.327902	0.196078
4	GO:0005201	extracellular matrix structural constitu...	123	24	9.01	5.400000e-05	2.66	4.267606	0.195122
5	GO:0070851	growth factor receptor binding	97	18	7.10	1.500000e-04	2.54	3.823909	0.185567
6	GO:0004896	cytokine receptor activity	50	12	3.66	2.000000e-04	3.28	3.698970	0.240000
7	GO:0019955	cytokine binding	89	20	6.52	4.100000e-04	3.07	3.387216	0.224719
8	GO:0008201	heparin binding	111	19	8.13	4.200000e-04	2.34	3.376751	0.171171
9	GO:0008528	G protein-coupled peptide receptor activ...	66	13	4.83	7.100000e-04	2.69	3.148742	0.196970
10	GO:0017046	peptide hormone binding	35	9	2.56	7.200000e-04	3.52	3.142668	0.257143
11	GO:0002020	protease binding	109	18	7.98	9.000000e-04	2.26	3.045757	0.165138
12	GO:0004623	phospholipase A2 activity	23	7	1.68	9.600000e-04	4.17	3.017729	0.304348
13	GO:0140416	transcription regulator inhibitor activi...	18	6	1.32	1.310000e-03	4.55	2.882729	0.333333
14	GO:0008373	sialyltransferase activity	18	6	1.32	1.310000e-03	4.55	2.882729	0.333333
15	GO:0019838	growth factor binding	105	20	7.69	1.770000e-03	2.60	2.752027	0.190476
16	GO:0004714	transmembrane receptor protein tyrosine ...	51	11	3.74	2.600000e-03	2.94	2.585027	0.215686
17	GO:0005044	scavenger receptor activity	29	7	2.12	4.140000e-03	3.30	2.383000	0.241379
18	GO:0004930	G protein-coupled receptor activity	196	31	14.36	5.960000e-03	2.16	2.224754	0.158163
19	GO:0015144	carbohydrate transmembrane transporter a...	24	6	1.76	6.590000e-03	3.41	2.181115	0.250000
20	GO:0008374	O-acyltransferase activity	45	7	3.30	7.990000e-03	2.12	2.097453	0.155556
21	GO:0004622	lysophospholipase activity	18	5	1.32	7.990000e-03	3.79	2.097453	0.277778
22	GO:0005126	cytokine receptor binding	147	22	10.77	8.590000e-03	2.04	2.066007	0.149660

	Scores_pgcVSeg_up	Scores_pgcVSipsc_up	Combined
external side of plasma membrane	1.600000e-07	5.300000e-07	8.480000e-14
collagen-containing extracellular matrix	4.900000e-05	2.400000e-05	1.176000e-09
intermediate filament	2.600000e-04	1.700000e-05	4.420000e-09
extracellular matrix	1.100000e-05	3.680000e-03	4.048000e-08
basal plasma membrane	1.000000e-04	4.800000e-04	4.800000e-08
blood microparticle	3.660000e-03	3.800000e-04	1.390800e-06
brush border membrane	1.299000e-02	9.300000e-03	1.208070e-04
Golgi lumen	1.688000e-02	7.920000e-03	1.336896e-04
collagen trimer	1.083000e-02	1.994000e-02	2.159502e-04
desmosome	4.764000e-02	6.440000e-03	3.068016e-04

Prepare environment¶

Compare DEGs¶

Venn diagrams¶

All¶

All positive¶

All negative¶

Pairwise comparisons¶

Up in PGC vs EGCLC and up in PGC vs iPSC¶

Up in PGC vs EGCLC and up in EG vs iPSC¶

Up in PGC vs iPSC and up in EG vs iPSC¶

Up in EG vs iPSC and up in EG vs PGC¶

Up in EG vs iPSC and up in iPSC vs PGC¶

Up in iPSC vs EG and up in PGC vs EG¶

Up in iPSC vs EG and up in EG vs PGC¶

Up in iPSC vs EG and up in PGC vs iPSC¶

Up in iPSC vs EG and up in iPSC vs PGC¶

Up in PGC vs EG and up in iPSC vs PGC¶

Up in EG vs PGC and up in PGC vs iPSC¶

Up in EG vs PGC and up in iPSC vs PGC¶

Statistical overlaps¶

Jaccard index - precise numbers¶

Comparisons of GO term¶

GO BP¶

Venn diagrams¶

Terms enriched in upregulated genes¶

Terms enriched in downregulated genes¶

Terms enriched in all genes¶

GO MF¶

Venn diagrams¶

Terms enriched in upregulated genes¶

Terms enriched in downregulated genes¶

Terms enriched in all genes¶

GO CC¶

Venn diagrams¶

Terms enriched in upregulated genes¶

Terms enriched in downregulated genes¶

Reactome¶

Venn diagrams¶

Terms enriched in upregulated genes¶

Terms enriched in downregulated genes¶

	estimate	pvals	log10_pval	gene_annotated	n_gene_annotated	gene_significant	n_gene_significant	ER	genes_score
source
REACTOME_GPCR_LIGAND_BINDING	inf	0.000000e+00	inf	['GPR183', 'CCL2', 'CXCL3', 'CXCL10', 'CXCL6',...	463	['GCGR', 'WNT2B', 'TRH', 'APLNR', 'NMUR1', 'PT...	38	0.082073	[39.49588643299154, 33.297928178377134, 40.062...
REACTOME_EXTRACELLULAR_MATRIX_ORGANIZATION	42.519016	3.026805e-43	42.519016	['BMP2', 'ICAM1', 'TNC', 'SDC4', 'SERPINE1', '...	300	['HAPLN1', 'COL23A1', 'PLOD1', 'DMD', 'CTSL', ...	32	0.106667	[65.7986538088462, 103.36318933859657, 69.0811...
REACTOME_RHO_GTPASE_CYCLE	34.512749	3.070788e-35	34.512749	['JAG1', 'TRIP10', 'RHOB', 'CAVIN1', 'AKAP12',...	450	['ARMCX3', 'PCDH7', 'ACTC1', 'DIAPH2', 'DSP', ...	26	0.057778	[54.70057406879335, 13.91909396274346, 84.6465...
REACTOME_SIGNALING_BY_INTERLEUKINS	33.179810	6.609813e-34	33.179810	['RIPK2', 'CCL2', 'MAP3K8', 'VEGFA', 'DUSP4', ...	461	['IL4R', 'CSF3R', 'JUNB', 'NLRC5', 'NFKB2', 'F...	25	0.054230	[23.204773309513616, 81.91921767972629, 6.9696...
REACTOME_NEUTROPHIL_DEGRANULATION	33.179810	6.609813e-34	33.179810	['PTX3', 'PLAU', 'NFKB1', 'SLC2A3', 'CXCL1', '...	478	['FUCA1', 'DSP', 'CAMP', 'BST1', 'HGSNAT', 'AG...	25	0.052301	[39.4499262012086, 39.92293469792166, 27.98103...
...	...	...	...	...	...	...	...	...	...
REACTOME_REGULATION_OF_TP53_ACTIVITY	6.614033	2.432021e-07	6.614033	['SGK1', 'TP53INP1', 'AURKA', 'CDK1', 'TPX2', ...	160	['CHD3', 'PRDM1', 'POU4F1', 'SGK1', 'SMYD2']	5	0.031250	[142.44874207108666, 30.476909844062614, 20.59...
REACTOME_DISORDERS_OF_TRANSMEMBRANE_TRANSPORTERS	6.614033	2.432021e-07	6.614033	['ABCA1', 'SLC2A1', 'CP', 'HK1', 'GCK', 'AVPR1...	177	['GCKR', 'SLC2A9', 'SLCO2A1', 'ABCA1', 'SLC6A2']	5	0.028249	[19.426605437676702, 29.234868870075427, 24.49...
REACTOME_CILIUM_ASSEMBLY	6.614033	2.432021e-07	6.614033	['TUBB2A', 'KIF3B', 'TUBA4A', 'DYNLL2', 'HDAC6...	202	['KIF3A', 'PRKAR2B', 'CYS1', 'IFT172', 'KIFAP3']	5	0.024752	[38.279452481075246, 90.7403994556073, 28.8877...
REACTOME_NEDDYLATION	6.614033	2.432021e-07	6.614033	['SPSB1', 'NFE2L2', 'CDKN1A', 'SOCS3', 'SQSTM1...	245	['SPSB2', 'EPAS1', 'FBXO10', 'COMMD3', 'SPSB1']	5	0.020408	[13.38792493848568, 6.904907507229172, 24.8095...
REACTOME_ORGANELLE_BIOGENESIS_AND_MAINTENANCE	6.614033	2.432021e-07	6.614033	['TUBB2A', 'SOD2', 'PPARGC1A', 'ACSS2', 'KIF3B...	296	['KIF3A', 'PRKAR2B', 'CYS1', 'IFT172', 'KIFAP3']	5	0.016892	[38.279452481075246, 90.7403994556073, 28.8877...