1 Load environment

Code

import warnings

warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import pandas as pd
import numpy as np
import random
import itertools

from tqdm import tqdm

import decoupler as dc
import sys

sys.setrecursionlimit(20000)

sys.path.append("./../../../../utilities_folder")
from utilities import load_object, intTable, plotGenesInTerm, getAnnGenes, run_ora_catchErrors

Set R environment with rpy2:

Code

import rpy2.rinterface_lib.callbacks
import anndata2ri
import logging

from rpy2.robjects import pandas2ri
import rpy2.robjects as ro

sc.settings.verbosity = 0
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython

Set up of graphical parameters for Python plots:

Code

%matplotlib inline
sc.set_figure_params(dpi = 300, fontsize = 20)

plt.rcParams['svg.fonttype'] = 'none'

cmap_up = sns.light_palette("red", as_cmap=True)
cmap_down = sns.light_palette("blue", as_cmap=True)
cmap_all = sns.light_palette("seagreen", as_cmap=True)

Set up of graphical parameters for R plots:

Code

default_units = 'in' 
default_res = 300
default_width = 10
default_height = 9

import rpy2
old_setup_graphics = rpy2.ipython.rmagic.RMagics.setup_graphics

def new_setup_graphics(self, args):
    if getattr(args, 'units') is not None:
        if args.units != default_units:
            return old_setup_graphics(self, args)
    args.units = default_units
    if getattr(args, 'res') is None:
        args.res = default_res
    if getattr(args, 'width') is None:
        args.width = default_width
    if getattr(args, 'height') is None:
        args.height = default_height        
    return old_setup_graphics(self, args)


rpy2.ipython.rmagic.RMagics.setup_graphics = new_setup_graphics

Here the cell were we inject the parameters using Quarto renderer:

Code

# Injected Parameters
N = 3

Code

# Injected Parameters
N = 13

Import R libraries:

Code

%%R
source('./../../../../utilities_folder/GO_helper.r')
loc <- './../../../../R_loc' # pointing to the renv environment

.libPaths(loc)

library('topGO')
library('org.Hs.eg.db')
library(dplyr)
library(ggplot2)

Set output folders:

Code

output_folder = './'
folder = './tables/cluster_' + str(N) + '/'

2 Load data

Here we load the dataframe:

Code

markers = pd.read_excel(folder + 'genes_in_cluster_' + str(N) + '.xlsx', index_col = 0)
markers

	logFC.celltypes_leveledhEGCLC	logFC.celltypes_levelediMeLC	logCPM	LR	PValue	FDR	clusters
ACTA1	11.493834	13.291811	2.882094	511.356736	1.649850e-110	2.513090e-109	13
SPIB	12.364728	11.586142	2.885391	486.441048	4.140128e-105	5.818964e-104	13
SERTAD4	10.616763	12.800036	2.279025	413.057395	3.286390e-89	3.487758e-88	13
CACNG4	11.786840	11.698635	2.475937	406.995026	6.760063e-88	7.025198e-87	13
EMILIN1	11.896756	10.995019	2.425626	404.247221	2.661738e-87	2.735346e-86	13
POPDC3	11.216938	11.819366	2.133988	353.461856	2.655021e-76	2.210001e-75	13
MLKL	10.852160	12.158127	2.061818	352.706863	3.868575e-76	3.213461e-75	13
AC007906.2	10.481626	12.280169	1.950036	350.659466	1.073694e-75	8.863514e-75	13
FBXO48	11.367965	11.230980	2.070526	342.220777	7.212057e-74	5.744282e-73	13
CCDC3	10.619262	12.101212	1.921517	339.272319	3.136467e-73	2.469474e-72	13
KCNH8	11.338643	11.048491	2.009845	335.210888	2.375579e-72	1.834882e-71	13
ANGPT1	10.617072	11.965527	1.864253	328.988159	5.284128e-71	3.976514e-70	13
DYRK3	10.986880	11.658736	1.942958	328.902937	5.513445e-71	4.146484e-70	13
RASGEF1B	10.876823	11.775267	1.920243	328.638295	6.290941e-71	4.719385e-70	13
MET	11.166657	11.239120	1.937661	325.275316	3.363236e-70	2.479663e-69	13
SHISAL1	11.176399	11.206281	1.938033	325.158890	3.564196e-70	2.626215e-69	13
HIST1H4I	10.859842	11.656711	1.871779	321.243636	2.509192e-69	1.822010e-68	13
CCDC160	11.047945	11.260604	1.863750	316.592564	2.548906e-68	1.821103e-67	13
C12orf60	11.044474	11.265925	1.859689	316.397164	2.809644e-68	2.006196e-67	13
VWA2	10.838110	11.545149	1.817438	314.126288	8.713880e-68	6.174303e-67	13
LRRC8C	10.944186	11.228256	1.784143	307.639474	2.209546e-66	1.531246e-65	13
PRR5L	10.747022	11.484815	1.741472	305.705429	5.793153e-66	3.996277e-65	13
CTRL	10.965316	11.088085	1.761878	304.594342	1.007857e-65	6.936529e-65	13
BMP1	10.639870	11.475658	1.678547	299.339185	1.382889e-64	9.340920e-64	13
SKIDA1	10.676182	11.409880	1.677247	298.038824	2.643736e-64	1.774763e-63	13
RAB30	10.927523	10.850198	1.676287	294.937649	1.239863e-63	8.263226e-63	13
GPLD1	10.824435	10.932813	1.626964	289.220189	2.141323e-62	1.394590e-61	13
LRRC3	10.756358	11.076017	1.619414	288.951581	2.447988e-62	1.590861e-61	13
PCDHA13	10.738773	11.094081	1.614907	288.458574	3.129653e-62	2.031651e-61	13
SLC41A2	10.802256	10.892097	1.602220	286.396618	8.743739e-62	5.639500e-61	13
HCN3	10.606034	11.225528	1.574186	285.417684	1.424071e-61	9.140706e-61	13
KHDC1	10.730836	10.936155	1.564449	282.486777	6.134006e-61	3.889407e-60	13
ST8SIA3	10.476971	11.023575	1.433392	269.377827	4.207969e-58	2.556085e-57	13
UNC79	10.329794	10.878265	1.296046	255.141290	5.055867e-55	2.920384e-54	13

Code

allGenes_series = pd.read_csv('./tables/all_bkg_genes.csv')
allGenes = allGenes_series['0'].tolist()

Here we load the dictionary that associates to each GO term its genes:

Code

GO2gene = load_object('./../../../../data/GO2gene_complete.pickle')

3 Markers of cluster

We filter genes for the cluster under investigation based on the p-value adjusted that we then convert in -log(p-value adjusted):

Code


markers = markers[markers.FDR < 0.01]
markers['-log10(FDR)'] = -np.log10(markers.FDR)
markers = markers.replace(np.inf, markers[markers['-log10(FDR)'] != np.inf]['-log10(FDR)'].max())
markers

	logFC.celltypes_leveledhEGCLC	logFC.celltypes_levelediMeLC	logCPM	LR	PValue	FDR	clusters	-log10(FDR)
ACTA1	11.493834	13.291811	2.882094	511.356736	1.649850e-110	2.513090e-109	13	108.599792
SPIB	12.364728	11.586142	2.885391	486.441048	4.140128e-105	5.818964e-104	13	103.235154
SERTAD4	10.616763	12.800036	2.279025	413.057395	3.286390e-89	3.487758e-88	13	87.457454
CACNG4	11.786840	11.698635	2.475937	406.995026	6.760063e-88	7.025198e-87	13	86.153341
EMILIN1	11.896756	10.995019	2.425626	404.247221	2.661738e-87	2.735346e-86	13	85.562988
POPDC3	11.216938	11.819366	2.133988	353.461856	2.655021e-76	2.210001e-75	13	74.655607
MLKL	10.852160	12.158127	2.061818	352.706863	3.868575e-76	3.213461e-75	13	74.493027
AC007906.2	10.481626	12.280169	1.950036	350.659466	1.073694e-75	8.863514e-75	13	74.052394
FBXO48	11.367965	11.230980	2.070526	342.220777	7.212057e-74	5.744282e-73	13	72.240764
CCDC3	10.619262	12.101212	1.921517	339.272319	3.136467e-73	2.469474e-72	13	71.607396
KCNH8	11.338643	11.048491	2.009845	335.210888	2.375579e-72	1.834882e-71	13	70.736392
ANGPT1	10.617072	11.965527	1.864253	328.988159	5.284128e-71	3.976514e-70	13	69.400498
DYRK3	10.986880	11.658736	1.942958	328.902937	5.513445e-71	4.146484e-70	13	69.382320
RASGEF1B	10.876823	11.775267	1.920243	328.638295	6.290941e-71	4.719385e-70	13	69.326115
MET	11.166657	11.239120	1.937661	325.275316	3.363236e-70	2.479663e-69	13	68.605607
SHISAL1	11.176399	11.206281	1.938033	325.158890	3.564196e-70	2.626215e-69	13	68.580670
HIST1H4I	10.859842	11.656711	1.871779	321.243636	2.509192e-69	1.822010e-68	13	67.739449
CCDC160	11.047945	11.260604	1.863750	316.592564	2.548906e-68	1.821103e-67	13	66.739666
C12orf60	11.044474	11.265925	1.859689	316.397164	2.809644e-68	2.006196e-67	13	66.697627
VWA2	10.838110	11.545149	1.817438	314.126288	8.713880e-68	6.174303e-67	13	66.209412
LRRC8C	10.944186	11.228256	1.784143	307.639474	2.209546e-66	1.531246e-65	13	64.814955
PRR5L	10.747022	11.484815	1.741472	305.705429	5.793153e-66	3.996277e-65	13	64.398344
CTRL	10.965316	11.088085	1.761878	304.594342	1.007857e-65	6.936529e-65	13	64.158858
BMP1	10.639870	11.475658	1.678547	299.339185	1.382889e-64	9.340920e-64	13	63.029610
SKIDA1	10.676182	11.409880	1.677247	298.038824	2.643736e-64	1.774763e-63	13	62.750860
RAB30	10.927523	10.850198	1.676287	294.937649	1.239863e-63	8.263226e-63	13	62.082850
GPLD1	10.824435	10.932813	1.626964	289.220189	2.141323e-62	1.394590e-61	13	60.855553
LRRC3	10.756358	11.076017	1.619414	288.951581	2.447988e-62	1.590861e-61	13	60.798368
PCDHA13	10.738773	11.094081	1.614907	288.458574	3.129653e-62	2.031651e-61	13	60.692151
SLC41A2	10.802256	10.892097	1.602220	286.396618	8.743739e-62	5.639500e-61	13	60.248759
HCN3	10.606034	11.225528	1.574186	285.417684	1.424071e-61	9.140706e-61	13	60.039020
KHDC1	10.730836	10.936155	1.564449	282.486777	6.134006e-61	3.889407e-60	13	59.410117
ST8SIA3	10.476971	11.023575	1.433392	269.377827	4.207969e-58	2.556085e-57	13	56.592425
UNC79	10.329794	10.878265	1.296046	255.141290	5.055867e-55	2.920384e-54	13	53.534560

3.0.1 All regulated

Code

all_sign = markers.index.tolist()
allSelected = allGenes_series['0'].isin(all_sign).astype('int').tolist()

4 topGO

4.1 All significant

Code

%%R -i allSelected -i allGenes

allGenes_v <- c(allSelected)
#print(allGenes_v)
names(allGenes_v) <- allGenes
allGenes_v <- unlist(allGenes_v)

geneNames <- c(allGenes)

ann_org_BP <- topGO::annFUN.org(whichOnto='BP', feasibleGenes=names(allGenes_v), 
                           mapping='org.Hs.eg', ID='symbol')

ann_org_MF <- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(allGenes_v), 
                           mapping='org.Hs.eg', ID='symbol')

ann_org_CC <- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(allGenes_v), 
                           mapping='org.Hs.eg', ID='symbol')

selection <- function(allScores){return (as.logical(allScores))}

Code

%%R
#print(lapply(ann_org_BP, count_genes))

GOdata <- new("topGOdata",
  ontology="BP",
  allGenes=allGenes_v,
  annot=annFUN.GO2genes,
  GO2genes=ann_org_BP,
  geneSel = selection,
  nodeSize=10)

Code

%%R -o results

results <- runTest(GOdata, algorithm="weight01",statistic="fisher")

Code

scores = ro.r.score(results)
score_names = ro.r(
'''
names(results@score)
'''
)
go_data = ro.r.GOdata

genesData = ro.r(
'''
geneData(results)
'''
)
genesData

array([10903,    25,    10,  1082], dtype=int32)

Code

#num_summarize = min(100, len(score_names))
results_table = ro.r.GenTable(go_data, weight=results,
        orderBy="weight", topNodes=len(scores))

Code

results_table_py = ro.conversion.rpy2py(results_table)

Code

scores_py = ro.conversion.rpy2py(scores)
score_names = [i for i in score_names]

Code

scores_df = pd.DataFrame({'Scores': scores_py, 'GO.ID': score_names})
results_table_py = results_table_py.merge(scores_df, left_on = 'GO.ID', right_on = 'GO.ID')
results_table_py

	GO.ID	Term	Annotated	Significant	Expected	weight	Scores
0	GO:0050918	positive chemotaxis	33	2	0.08	0.0026	0.002552
1	GO:0014068	positive regulation of phosphatidylinosi...	45	2	0.10	0.0047	0.004704
2	GO:0030199	collagen fibril organization	45	2	0.10	0.0047	0.004704
3	GO:0090130	tissue migration	235	4	0.54	0.0062	0.006173
4	GO:0051897	positive regulation of protein kinase B ...	71	2	0.16	0.0114	0.011387
...	...	...	...	...	...	...	...
5697	GO:2001251	negative regulation of chromosome organi...	88	0	0.20	1.0000	1.000000
5698	GO:2001252	positive regulation of chromosome organi...	93	0	0.21	1.0000	1.000000
5699	GO:2001256	regulation of store-operated calcium ent...	11	0	0.03	1.0000	1.000000
5700	GO:2001258	negative regulation of cation channel ac...	23	0	0.05	1.0000	1.000000
5701	GO:2001267	regulation of cysteine-type endopeptidas...	13	0	0.03	1.0000	1.000000

5702 rows × 7 columns

Code

results_table_py = results_table_py[results_table_py['Scores'] < 0.05]
results_table_py = results_table_py[results_table_py['Annotated'] < 200]
results_table_py = results_table_py[results_table_py['Annotated'] > 15]

Code

results_table_py['-log10(pvalue)'] = - np.log10(results_table_py.Scores)
results_table_py['Significant/Annotated'] = results_table_py['Significant'] / results_table_py['Annotated']

Code

intTable(results_table_py, folder = folder, fileName = 'GO_BP_all.xlsx', save = True)

Code

%%R -i folder
Res <- GenTable(GOdata, weight=results,
        orderBy="weight", topNodes=length(score(results)))
#print(Res[0:10,])
colnames(Res) <- c("GO.ID", "Term", "Annotated", "Significant", "Expected", "Statistics")
Res$ER <- Res$Significant / Res$Expected

image = bubbleplot(Res, Ont = 'BP', fillCol = 'forestgreen')
ggsave(file=paste0(folder, "TopGO_results_BP.pdf"), plot=image, width=12, height=4)

bubbleplot(Res, Ont = 'BP', fillCol = 'forestgreen')

Code

%%R -i markers
image = plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=15, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

ggsave(file=paste0(folder, "Genes_in_Term_results_BP.pdf"), plot=image, width=12, height=4)

plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=15, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

Code

%%R -i markers -i folder
saveGenesInTerm(Res, GOdata, nterms = 20, path = paste0(folder,'GO_BP_genesInTerm_all.xlsx'), SE = markers)

Code

%%R

GOdata <- new("topGOdata",
  ontology="MF",
  allGenes=allGenes_v,
  annot=annFUN.GO2genes,
  GO2genes=ann_org_MF,
  geneSel = selection,
  nodeSize=10)

Code

%%R -o results

results <- runTest(GOdata, algorithm="weight01",statistic="fisher")

Code

scores = ro.r.score(results)
score_names = ro.r(
'''
names(results@score)
'''
)
go_data = ro.r.GOdata

genesData = ro.r(
'''
geneData(results)
'''
)
genesData

array([11203,    28,    10,   150], dtype=int32)

Code

#num_summarize = min(100, len(score_names))
results_table = ro.r.GenTable(go_data, weight=results,
        orderBy="weight", topNodes=len(scores))

Code

results_table_py = ro.conversion.rpy2py(results_table)

Code

scores_py = ro.conversion.rpy2py(scores)
score_names = [i for i in score_names]

Code

scores_df = pd.DataFrame({'Scores': scores_py, 'GO.ID': score_names})
results_table_py = results_table_py.merge(scores_df, left_on = 'GO.ID', right_on = 'GO.ID')
results_table_py = results_table_py[results_table_py['Scores'] < 0.05]
results_table_py = results_table_py[results_table_py['Annotated'] < 200]
results_table_py = results_table_py[results_table_py['Annotated'] > 15]

intTable(results_table_py, folder = folder, fileName = 'GO_MF_all.xlsx', save = True)

Code

%%R
Res <- GenTable(GOdata, weight=results,
        orderBy="weight", topNodes=length(score(results)))
#print(Res[0:10,])
colnames(Res) <- c("GO.ID", "Term", "Annotated", "Significant", "Expected", "Statistics")
Res$ER <- Res$Significant / Res$Expected

image = bubbleplot(Res, Ont = 'MF', fillCol = 'forestgreen')

ggsave(file=paste0(folder, "TopGO_results_MF.pdf"), plot=image, width=12, height=4)

bubbleplot(Res, Ont = 'MF', fillCol = 'forestgreen')

Code

%%R -i markers
image = plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=15, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

ggsave(file=paste0(folder, "Genes_in_Term_results_MF.pdf"), plot=image, width=12, height=4)

plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=15, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

Code

%%R -i markers -i folder
saveGenesInTerm(Res, GOdata, nterms = 20, path = paste0(folder,'GO_MF_genesInTerm_all.xlsx'), SE = markers)

Code

%%R

GOdata <- new("topGOdata",
  ontology="CC",
  allGenes=allGenes_v,
  annot=annFUN.GO2genes,
  GO2genes=ann_org_CC,
  geneSel = selection,
  nodeSize=10)

Code

%%R -o results

results <- runTest(GOdata, algorithm="weight01",statistic="fisher")

Code

scores = ro.r.score(results)
score_names = ro.r(
'''
names(results@score)
'''
)
go_data = ro.r.GOdata

genesData = ro.r(
'''
geneData(results)
'''
)
genesData

array([11323,    29,    10,   170], dtype=int32)

Code

#num_summarize = min(100, len(score_names))
results_table = ro.r.GenTable(go_data, weight=results,
        orderBy="weight", topNodes=len(scores))

Code

results_table_py = ro.conversion.rpy2py(results_table)

Code

scores_py = ro.conversion.rpy2py(scores)
score_names = [i for i in score_names]

Code

scores_df = pd.DataFrame({'Scores': scores_py, 'GO.ID': score_names})
results_table_py = results_table_py.merge(scores_df, left_on = 'GO.ID', right_on = 'GO.ID')

Code

results_table_py = results_table_py[results_table_py['Scores'] < 0.05]
results_table_py = results_table_py[results_table_py['Annotated'] < 200]
results_table_py = results_table_py[results_table_py['Annotated'] > 15]

intTable(results_table_py, folder = folder, fileName = 'GO_CC_all.xlsx', save = True)

Code

%%R
Res <- GenTable(GOdata, weight=results,
        orderBy="weight", topNodes=length(score(results)))
#print(Res[0:10,])
colnames(Res) <- c("GO.ID", "Term", "Annotated", "Significant", "Expected", "Statistics")
Res$ER <- Res$Significant / Res$Expected
image = bubbleplot(Res, Ont = 'CC', fillCol = 'forestgreen')

ggsave(file=paste0(folder, "TopGO_results_CC.pdf"), plot=image, width=12, height=4)

bubbleplot(Res, Ont = 'CC', fillCol = 'forestgreen')

Code

%%R -i markers
image = plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=12, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

ggsave(file=paste0(folder, "Genes:_in_Term_results_CC.pdf"), plot=image, width=12, height=4)

plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=12, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

Code

%%R -i markers -i folder
saveGenesInTerm(Res, GOdata, nterms = 20, path = paste0(folder,'GO_CC_genesInTerm_all.xlsx'), SE = markers)

4.1.0.1 Reactome

Code

curated = msigdb[msigdb['collection'].isin(['reactome_pathways'])]
curated = curated[~curated.duplicated(['geneset', 'genesymbol'])]

aggregated = curated[["geneset", "genesymbol"]].groupby("geneset").count().rename(columns={"genesymbol": "gene_count"})
curated = curated[~curated.geneset.isin(aggregated[aggregated.gene_count > 200].index.tolist())].copy()
curated = curated[~curated.geneset.isin(aggregated[aggregated.gene_count < 15].index.tolist())].copy()

Code

rank = pd.DataFrame(markers['-log10(FDR)'])

rank_copy = rank.copy()
rank_copy['pval'] = markers.loc[rank.index].FDR

Code

rank_copy

	-log10(FDR)	pval
ACTA1	108.599792	2.513090e-109
SPIB	103.235154	5.818964e-104
SERTAD4	87.457454	3.487758e-88
CACNG4	86.153341	7.025198e-87
EMILIN1	85.562988	2.735346e-86
POPDC3	74.655607	2.210001e-75
MLKL	74.493027	3.213461e-75
AC007906.2	74.052394	8.863514e-75
FBXO48	72.240764	5.744282e-73
CCDC3	71.607396	2.469474e-72
KCNH8	70.736392	1.834882e-71
ANGPT1	69.400498	3.976514e-70
DYRK3	69.382320	4.146484e-70
RASGEF1B	69.326115	4.719385e-70
MET	68.605607	2.479663e-69
SHISAL1	68.580670	2.626215e-69
HIST1H4I	67.739449	1.822010e-68
CCDC160	66.739666	1.821103e-67
C12orf60	66.697627	2.006196e-67
VWA2	66.209412	6.174303e-67
LRRC8C	64.814955	1.531246e-65
PRR5L	64.398344	3.996277e-65
CTRL	64.158858	6.936529e-65
BMP1	63.029610	9.340920e-64
SKIDA1	62.750860	1.774763e-63
RAB30	62.082850	8.263226e-63
GPLD1	60.855553	1.394590e-61
LRRC3	60.798368	1.590861e-61
PCDHA13	60.692151	2.031651e-61
SLC41A2	60.248759	5.639500e-61
HCN3	60.039020	9.140706e-61
KHDC1	59.410117	3.889407e-60
ST8SIA3	56.592425	2.556085e-57
UNC79	53.534560	2.920384e-54

Code

results_table_py = run_ora_catchErrors(mat=rank.T, net=curated, source='geneset', target='genesymbol', verbose=False, n_up=len(rank), n_bottom=0)
len(results_table_py)

No significant term was found

Code

intTable(results_table_py, folder = folder, fileName = 'Reactome_all.xlsx', save = True)

Code

if len(results_table_py) > 0:
    results_table_py = getAnnGenes(results_table_py, GO2gene['reactome_pathways'], rank_copy)
    _, df = plotGenesInTerm(results = results_table_py, GO2gene = GO2gene['reactome_pathways'], DEGs = rank_copy, n_top_terms = 10, cmap = cmap_all)

Code

if len(results_table_py) > 0:
    intTable(df, folder = folder, fileName = 'genesInTerm_Reactome_all.xlsx', save = True)

4.1.0.2 KEGG

Code

curated = msigdb[msigdb['collection'].isin(['kegg_pathways'])]
curated = curated[~curated.duplicated(['geneset', 'genesymbol'])]

aggregated = curated[["geneset", "genesymbol"]].groupby("geneset").count().rename(columns={"genesymbol": "gene_count"})
curated = curated[~curated.geneset.isin(aggregated[aggregated.gene_count > 200].index.tolist())].copy()
curated = curated[~curated.geneset.isin(aggregated[aggregated.gene_count < 15].index.tolist())].copy()

Code

results_table_py = run_ora_catchErrors(mat=rank.T, net=curated, source='geneset', target='genesymbol', verbose=False, n_up=len(rank), n_bottom=0)

No significant term was found

Code

intTable(results_table_py, folder = folder, fileName = 'KEGG_all.xlsx', save = True)

Code

if len(results_table_py) > 0:
    results_table_py = getAnnGenes(results_table_py, GO2gene['kegg_pathways'], rank_copy)
    _, df = plotGenesInTerm(results_table_py, GO2gene['kegg_pathways'], rank_copy, n_top_terms = 10, n_top_genes = 15, cmap = cmap_all)

Code

if len(results_table_py) > 0:
    intTable(df, folder = folder, fileName = 'genesInTerm_KEGG_all.xlsx', save = True)