1 Load environment

Code
import warnings

warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import pandas as pd
import numpy as np
import random
import itertools

from tqdm import tqdm

import decoupler as dc
import sys

sys.setrecursionlimit(20000)

sys.path.append("./../../../../utilities_folder")
from utilities import load_object, intTable, plotGenesInTerm, getAnnGenes, run_ora_catchErrors

Set R environment with rpy2:

Code
import rpy2.rinterface_lib.callbacks
import anndata2ri
import logging

from rpy2.robjects import pandas2ri
import rpy2.robjects as ro

sc.settings.verbosity = 0
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython

Set up of graphical parameters for Python plots:

Code
%matplotlib inline
sc.set_figure_params(dpi = 300, fontsize = 20)

plt.rcParams['svg.fonttype'] = 'none'

cmap_up = sns.light_palette("red", as_cmap=True)
cmap_down = sns.light_palette("blue", as_cmap=True)
cmap_all = sns.light_palette("seagreen", as_cmap=True)

Set up of graphical parameters for R plots:

Code
default_units = 'in' 
default_res = 300
default_width = 10
default_height = 9

import rpy2
old_setup_graphics = rpy2.ipython.rmagic.RMagics.setup_graphics

def new_setup_graphics(self, args):
    if getattr(args, 'units') is not None:
        if args.units != default_units:
            return old_setup_graphics(self, args)
    args.units = default_units
    if getattr(args, 'res') is None:
        args.res = default_res
    if getattr(args, 'width') is None:
        args.width = default_width
    if getattr(args, 'height') is None:
        args.height = default_height        
    return old_setup_graphics(self, args)


rpy2.ipython.rmagic.RMagics.setup_graphics = new_setup_graphics

Here the cell were we inject the parameters using Quarto renderer:

Code
# Injected Parameters
N = 3
Code
# Injected Parameters
N = 13

Import R libraries:

Code
%%R
source('./../../../../utilities_folder/GO_helper.r')
loc <- './../../../../R_loc' # pointing to the renv environment

.libPaths(loc)

library('topGO')
library('org.Hs.eg.db')
library(dplyr)
library(ggplot2)

Set output folders:

Code
output_folder = './'
folder = './tables/cluster_' + str(N) + '/'

2 Load data

Here we load the dataframe:

Code
markers = pd.read_excel(folder + 'genes_in_cluster_' + str(N) + '.xlsx', index_col = 0)
markers
logFC.celltypes_leveledhEGCLC logFC.celltypes_leveledhPGCLC logFC.celltypes_levelediMeLC logCPM LR PValue FDR clusters
ACTA1 11.493834 0 13.291811 2.882094 511.356736 1.649850e-110 2.513090e-109 13
SPIB 12.364728 0 11.586142 2.885391 486.441048 4.140128e-105 5.818964e-104 13
SERTAD4 10.616763 0 12.800036 2.279025 413.057395 3.286390e-89 3.487758e-88 13
CACNG4 11.786840 0 11.698635 2.475937 406.995026 6.760063e-88 7.025198e-87 13
EMILIN1 11.896756 0 10.995019 2.425626 404.247221 2.661738e-87 2.735346e-86 13
POPDC3 11.216938 0 11.819366 2.133988 353.461856 2.655021e-76 2.210001e-75 13
MLKL 10.852160 0 12.158127 2.061818 352.706863 3.868575e-76 3.213461e-75 13
AC007906.2 10.481626 0 12.280169 1.950036 350.659466 1.073694e-75 8.863514e-75 13
FBXO48 11.367965 0 11.230980 2.070526 342.220777 7.212057e-74 5.744282e-73 13
CCDC3 10.619262 0 12.101212 1.921517 339.272319 3.136467e-73 2.469474e-72 13
KCNH8 11.338643 0 11.048491 2.009845 335.210888 2.375579e-72 1.834882e-71 13
ANGPT1 10.617072 0 11.965527 1.864253 328.988159 5.284128e-71 3.976514e-70 13
DYRK3 10.986880 0 11.658736 1.942958 328.902937 5.513445e-71 4.146484e-70 13
RASGEF1B 10.876823 0 11.775267 1.920243 328.638295 6.290941e-71 4.719385e-70 13
MET 11.166657 0 11.239120 1.937661 325.275316 3.363236e-70 2.479663e-69 13
SHISAL1 11.176399 0 11.206281 1.938033 325.158890 3.564196e-70 2.626215e-69 13
HIST1H4I 10.859842 0 11.656711 1.871779 321.243636 2.509192e-69 1.822010e-68 13
CCDC160 11.047945 0 11.260604 1.863750 316.592564 2.548906e-68 1.821103e-67 13
C12orf60 11.044474 0 11.265925 1.859689 316.397164 2.809644e-68 2.006196e-67 13
VWA2 10.838110 0 11.545149 1.817438 314.126288 8.713880e-68 6.174303e-67 13
LRRC8C 10.944186 0 11.228256 1.784143 307.639474 2.209546e-66 1.531246e-65 13
PRR5L 10.747022 0 11.484815 1.741472 305.705429 5.793153e-66 3.996277e-65 13
CTRL 10.965316 0 11.088085 1.761878 304.594342 1.007857e-65 6.936529e-65 13
BMP1 10.639870 0 11.475658 1.678547 299.339185 1.382889e-64 9.340920e-64 13
SKIDA1 10.676182 0 11.409880 1.677247 298.038824 2.643736e-64 1.774763e-63 13
RAB30 10.927523 0 10.850198 1.676287 294.937649 1.239863e-63 8.263226e-63 13
GPLD1 10.824435 0 10.932813 1.626964 289.220189 2.141323e-62 1.394590e-61 13
LRRC3 10.756358 0 11.076017 1.619414 288.951581 2.447988e-62 1.590861e-61 13
PCDHA13 10.738773 0 11.094081 1.614907 288.458574 3.129653e-62 2.031651e-61 13
SLC41A2 10.802256 0 10.892097 1.602220 286.396618 8.743739e-62 5.639500e-61 13
HCN3 10.606034 0 11.225528 1.574186 285.417684 1.424071e-61 9.140706e-61 13
KHDC1 10.730836 0 10.936155 1.564449 282.486777 6.134006e-61 3.889407e-60 13
ST8SIA3 10.476971 0 11.023575 1.433392 269.377827 4.207969e-58 2.556085e-57 13
UNC79 10.329794 0 10.878265 1.296046 255.141290 5.055867e-55 2.920384e-54 13
Code
allGenes_series = pd.read_csv('./tables/all_bkg_genes.csv')
allGenes = allGenes_series['0'].tolist()

Here we load the dictionary that associates to each GO term its genes:

Code
GO2gene = load_object('./../../../../data/GO2gene_complete.pickle')

3 Markers of cluster

We filter genes for the cluster under investigation based on the p-value adjusted that we then convert in -log(p-value adjusted):

Code

markers = markers[markers.FDR < 0.01]
markers['-log10(FDR)'] = -np.log10(markers.FDR)
markers = markers.replace(np.inf, markers[markers['-log10(FDR)'] != np.inf]['-log10(FDR)'].max())
markers
logFC.celltypes_leveledhEGCLC logFC.celltypes_leveledhPGCLC logFC.celltypes_levelediMeLC logCPM LR PValue FDR clusters -log10(FDR)
ACTA1 11.493834 0 13.291811 2.882094 511.356736 1.649850e-110 2.513090e-109 13 108.599792
SPIB 12.364728 0 11.586142 2.885391 486.441048 4.140128e-105 5.818964e-104 13 103.235154
SERTAD4 10.616763 0 12.800036 2.279025 413.057395 3.286390e-89 3.487758e-88 13 87.457454
CACNG4 11.786840 0 11.698635 2.475937 406.995026 6.760063e-88 7.025198e-87 13 86.153341
EMILIN1 11.896756 0 10.995019 2.425626 404.247221 2.661738e-87 2.735346e-86 13 85.562988
POPDC3 11.216938 0 11.819366 2.133988 353.461856 2.655021e-76 2.210001e-75 13 74.655607
MLKL 10.852160 0 12.158127 2.061818 352.706863 3.868575e-76 3.213461e-75 13 74.493027
AC007906.2 10.481626 0 12.280169 1.950036 350.659466 1.073694e-75 8.863514e-75 13 74.052394
FBXO48 11.367965 0 11.230980 2.070526 342.220777 7.212057e-74 5.744282e-73 13 72.240764
CCDC3 10.619262 0 12.101212 1.921517 339.272319 3.136467e-73 2.469474e-72 13 71.607396
KCNH8 11.338643 0 11.048491 2.009845 335.210888 2.375579e-72 1.834882e-71 13 70.736392
ANGPT1 10.617072 0 11.965527 1.864253 328.988159 5.284128e-71 3.976514e-70 13 69.400498
DYRK3 10.986880 0 11.658736 1.942958 328.902937 5.513445e-71 4.146484e-70 13 69.382320
RASGEF1B 10.876823 0 11.775267 1.920243 328.638295 6.290941e-71 4.719385e-70 13 69.326115
MET 11.166657 0 11.239120 1.937661 325.275316 3.363236e-70 2.479663e-69 13 68.605607
SHISAL1 11.176399 0 11.206281 1.938033 325.158890 3.564196e-70 2.626215e-69 13 68.580670
HIST1H4I 10.859842 0 11.656711 1.871779 321.243636 2.509192e-69 1.822010e-68 13 67.739449
CCDC160 11.047945 0 11.260604 1.863750 316.592564 2.548906e-68 1.821103e-67 13 66.739666
C12orf60 11.044474 0 11.265925 1.859689 316.397164 2.809644e-68 2.006196e-67 13 66.697627
VWA2 10.838110 0 11.545149 1.817438 314.126288 8.713880e-68 6.174303e-67 13 66.209412
LRRC8C 10.944186 0 11.228256 1.784143 307.639474 2.209546e-66 1.531246e-65 13 64.814955
PRR5L 10.747022 0 11.484815 1.741472 305.705429 5.793153e-66 3.996277e-65 13 64.398344
CTRL 10.965316 0 11.088085 1.761878 304.594342 1.007857e-65 6.936529e-65 13 64.158858
BMP1 10.639870 0 11.475658 1.678547 299.339185 1.382889e-64 9.340920e-64 13 63.029610
SKIDA1 10.676182 0 11.409880 1.677247 298.038824 2.643736e-64 1.774763e-63 13 62.750860
RAB30 10.927523 0 10.850198 1.676287 294.937649 1.239863e-63 8.263226e-63 13 62.082850
GPLD1 10.824435 0 10.932813 1.626964 289.220189 2.141323e-62 1.394590e-61 13 60.855553
LRRC3 10.756358 0 11.076017 1.619414 288.951581 2.447988e-62 1.590861e-61 13 60.798368
PCDHA13 10.738773 0 11.094081 1.614907 288.458574 3.129653e-62 2.031651e-61 13 60.692151
SLC41A2 10.802256 0 10.892097 1.602220 286.396618 8.743739e-62 5.639500e-61 13 60.248759
HCN3 10.606034 0 11.225528 1.574186 285.417684 1.424071e-61 9.140706e-61 13 60.039020
KHDC1 10.730836 0 10.936155 1.564449 282.486777 6.134006e-61 3.889407e-60 13 59.410117
ST8SIA3 10.476971 0 11.023575 1.433392 269.377827 4.207969e-58 2.556085e-57 13 56.592425
UNC79 10.329794 0 10.878265 1.296046 255.141290 5.055867e-55 2.920384e-54 13 53.534560

3.0.1 All regulated

Code
all_sign = markers.index.tolist()
allSelected = allGenes_series['0'].isin(all_sign).astype('int').tolist()

4 topGO

4.1 All significant

Code
%%R -i allSelected -i allGenes

allGenes_v <- c(allSelected)
#print(allGenes_v)
names(allGenes_v) <- allGenes
allGenes_v <- unlist(allGenes_v)

geneNames <- c(allGenes)

ann_org_BP <- topGO::annFUN.org(whichOnto='BP', feasibleGenes=names(allGenes_v), 
                           mapping='org.Hs.eg', ID='symbol')

ann_org_MF <- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(allGenes_v), 
                           mapping='org.Hs.eg', ID='symbol')

ann_org_CC <- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(allGenes_v), 
                           mapping='org.Hs.eg', ID='symbol')

selection <- function(allScores){return (as.logical(allScores))}
Code
%%R
#print(lapply(ann_org_BP, count_genes))

GOdata <- new("topGOdata",
  ontology="BP",
  allGenes=allGenes_v,
  annot=annFUN.GO2genes,
  GO2genes=ann_org_BP,
  geneSel = selection,
  nodeSize=10)
Code
%%R -o results

results <- runTest(GOdata, algorithm="weight01",statistic="fisher")
Code
scores = ro.r.score(results)
score_names = ro.r(
'''
names(results@score)
'''
)
go_data = ro.r.GOdata

genesData = ro.r(
'''
geneData(results)
'''
)
genesData
array([10903,    25,    10,  1082], dtype=int32)
Code
#num_summarize = min(100, len(score_names))
results_table = ro.r.GenTable(go_data, weight=results,
        orderBy="weight", topNodes=len(scores))
Code
results_table_py = ro.conversion.rpy2py(results_table)
Code
scores_py = ro.conversion.rpy2py(scores)
score_names = [i for i in score_names]
Code
scores_df = pd.DataFrame({'Scores': scores_py, 'GO.ID': score_names})
results_table_py = results_table_py.merge(scores_df, left_on = 'GO.ID', right_on = 'GO.ID')
results_table_py
GO.ID Term Annotated Significant Expected weight Scores
0 GO:0050918 positive chemotaxis 33 2 0.08 0.0026 0.002552
1 GO:0014068 positive regulation of phosphatidylinosi... 45 2 0.10 0.0047 0.004704
2 GO:0030199 collagen fibril organization 45 2 0.10 0.0047 0.004704
3 GO:0090130 tissue migration 235 4 0.54 0.0062 0.006173
4 GO:0051897 positive regulation of protein kinase B ... 71 2 0.16 0.0114 0.011387
... ... ... ... ... ... ... ...
5697 GO:2001251 negative regulation of chromosome organi... 88 0 0.20 1.0000 1.000000
5698 GO:2001252 positive regulation of chromosome organi... 93 0 0.21 1.0000 1.000000
5699 GO:2001256 regulation of store-operated calcium ent... 11 0 0.03 1.0000 1.000000
5700 GO:2001258 negative regulation of cation channel ac... 23 0 0.05 1.0000 1.000000
5701 GO:2001267 regulation of cysteine-type endopeptidas... 13 0 0.03 1.0000 1.000000

5702 rows × 7 columns

Code
results_table_py = results_table_py[results_table_py['Scores'] < 0.05]
results_table_py = results_table_py[results_table_py['Annotated'] < 200]
results_table_py = results_table_py[results_table_py['Annotated'] > 15]
Code
results_table_py['-log10(pvalue)'] = - np.log10(results_table_py.Scores)
results_table_py['Significant/Annotated'] = results_table_py['Significant'] / results_table_py['Annotated']
Code
intTable(results_table_py, folder = folder, fileName = 'GO_BP_all.xlsx', save = True)
Code
%%R -i folder
Res <- GenTable(GOdata, weight=results,
        orderBy="weight", topNodes=length(score(results)))
#print(Res[0:10,])
colnames(Res) <- c("GO.ID", "Term", "Annotated", "Significant", "Expected", "Statistics")
Res$ER <- Res$Significant / Res$Expected

image = bubbleplot(Res, Ont = 'BP', fillCol = 'forestgreen')
ggsave(file=paste0(folder, "TopGO_results_BP.pdf"), plot=image, width=12, height=4)

bubbleplot(Res, Ont = 'BP', fillCol = 'forestgreen')

Code
%%R -i markers
image = plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=15, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

ggsave(file=paste0(folder, "Genes_in_Term_results_BP.pdf"), plot=image, width=12, height=4)

plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=15, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

Code
%%R -i markers -i folder
saveGenesInTerm(Res, GOdata, nterms = 20, path = paste0(folder,'GO_BP_genesInTerm_all.xlsx'), SE = markers)
Code
%%R

GOdata <- new("topGOdata",
  ontology="MF",
  allGenes=allGenes_v,
  annot=annFUN.GO2genes,
  GO2genes=ann_org_MF,
  geneSel = selection,
  nodeSize=10)
Code
%%R -o results

results <- runTest(GOdata, algorithm="weight01",statistic="fisher")
Code
scores = ro.r.score(results)
score_names = ro.r(
'''
names(results@score)
'''
)
go_data = ro.r.GOdata

genesData = ro.r(
'''
geneData(results)
'''
)
genesData
array([11203,    28,    10,   150], dtype=int32)
Code
#num_summarize = min(100, len(score_names))
results_table = ro.r.GenTable(go_data, weight=results,
        orderBy="weight", topNodes=len(scores))
Code
results_table_py = ro.conversion.rpy2py(results_table)
Code
scores_py = ro.conversion.rpy2py(scores)
score_names = [i for i in score_names]
Code
scores_df = pd.DataFrame({'Scores': scores_py, 'GO.ID': score_names})
results_table_py = results_table_py.merge(scores_df, left_on = 'GO.ID', right_on = 'GO.ID')
results_table_py = results_table_py[results_table_py['Scores'] < 0.05]
results_table_py = results_table_py[results_table_py['Annotated'] < 200]
results_table_py = results_table_py[results_table_py['Annotated'] > 15]

intTable(results_table_py, folder = folder, fileName = 'GO_MF_all.xlsx', save = True)
Code
%%R
Res <- GenTable(GOdata, weight=results,
        orderBy="weight", topNodes=length(score(results)))
#print(Res[0:10,])
colnames(Res) <- c("GO.ID", "Term", "Annotated", "Significant", "Expected", "Statistics")
Res$ER <- Res$Significant / Res$Expected

image = bubbleplot(Res, Ont = 'MF', fillCol = 'forestgreen')

ggsave(file=paste0(folder, "TopGO_results_MF.pdf"), plot=image, width=12, height=4)

bubbleplot(Res, Ont = 'MF', fillCol = 'forestgreen')

Code
%%R -i markers
image = plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=15, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

ggsave(file=paste0(folder, "Genes_in_Term_results_MF.pdf"), plot=image, width=12, height=4)

plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=15, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

Code
%%R -i markers -i folder
saveGenesInTerm(Res, GOdata, nterms = 20, path = paste0(folder,'GO_MF_genesInTerm_all.xlsx'), SE = markers)
Code
%%R

GOdata <- new("topGOdata",
  ontology="CC",
  allGenes=allGenes_v,
  annot=annFUN.GO2genes,
  GO2genes=ann_org_CC,
  geneSel = selection,
  nodeSize=10)
Code
%%R -o results

results <- runTest(GOdata, algorithm="weight01",statistic="fisher")
Code
scores = ro.r.score(results)
score_names = ro.r(
'''
names(results@score)
'''
)
go_data = ro.r.GOdata

genesData = ro.r(
'''
geneData(results)
'''
)
genesData
array([11323,    29,    10,   170], dtype=int32)
Code
#num_summarize = min(100, len(score_names))
results_table = ro.r.GenTable(go_data, weight=results,
        orderBy="weight", topNodes=len(scores))
Code
results_table_py = ro.conversion.rpy2py(results_table)
Code
scores_py = ro.conversion.rpy2py(scores)
score_names = [i for i in score_names]
Code
scores_df = pd.DataFrame({'Scores': scores_py, 'GO.ID': score_names})
results_table_py = results_table_py.merge(scores_df, left_on = 'GO.ID', right_on = 'GO.ID')
Code
results_table_py = results_table_py[results_table_py['Scores'] < 0.05]
results_table_py = results_table_py[results_table_py['Annotated'] < 200]
results_table_py = results_table_py[results_table_py['Annotated'] > 15]

intTable(results_table_py, folder = folder, fileName = 'GO_CC_all.xlsx', save = True)
Code
%%R
Res <- GenTable(GOdata, weight=results,
        orderBy="weight", topNodes=length(score(results)))
#print(Res[0:10,])
colnames(Res) <- c("GO.ID", "Term", "Annotated", "Significant", "Expected", "Statistics")
Res$ER <- Res$Significant / Res$Expected
image = bubbleplot(Res, Ont = 'CC', fillCol = 'forestgreen')

ggsave(file=paste0(folder, "TopGO_results_CC.pdf"), plot=image, width=12, height=4)

bubbleplot(Res, Ont = 'CC', fillCol = 'forestgreen')

Code
%%R -i markers
image = plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=12, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

ggsave(file=paste0(folder, "Genes:_in_Term_results_CC.pdf"), plot=image, width=12, height=4)

plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms=12, ngenes=12,
                             fillCol='forestgreen', log = TRUE)

Code
%%R -i markers -i folder
saveGenesInTerm(Res, GOdata, nterms = 20, path = paste0(folder,'GO_CC_genesInTerm_all.xlsx'), SE = markers)

4.1.0.1 Reactome

Code
curated = msigdb[msigdb['collection'].isin(['reactome_pathways'])]
curated = curated[~curated.duplicated(['geneset', 'genesymbol'])]

aggregated = curated[["geneset", "genesymbol"]].groupby("geneset").count().rename(columns={"genesymbol": "gene_count"})
curated = curated[~curated.geneset.isin(aggregated[aggregated.gene_count > 200].index.tolist())].copy()
curated = curated[~curated.geneset.isin(aggregated[aggregated.gene_count < 15].index.tolist())].copy()
Code
rank = pd.DataFrame(markers['-log10(FDR)'])

rank_copy = rank.copy()
rank_copy['pval'] = markers.loc[rank.index].FDR
Code
rank_copy
-log10(FDR) pval
ACTA1 108.599792 2.513090e-109
SPIB 103.235154 5.818964e-104
SERTAD4 87.457454 3.487758e-88
CACNG4 86.153341 7.025198e-87
EMILIN1 85.562988 2.735346e-86
POPDC3 74.655607 2.210001e-75
MLKL 74.493027 3.213461e-75
AC007906.2 74.052394 8.863514e-75
FBXO48 72.240764 5.744282e-73
CCDC3 71.607396 2.469474e-72
KCNH8 70.736392 1.834882e-71
ANGPT1 69.400498 3.976514e-70
DYRK3 69.382320 4.146484e-70
RASGEF1B 69.326115 4.719385e-70
MET 68.605607 2.479663e-69
SHISAL1 68.580670 2.626215e-69
HIST1H4I 67.739449 1.822010e-68
CCDC160 66.739666 1.821103e-67
C12orf60 66.697627 2.006196e-67
VWA2 66.209412 6.174303e-67
LRRC8C 64.814955 1.531246e-65
PRR5L 64.398344 3.996277e-65
CTRL 64.158858 6.936529e-65
BMP1 63.029610 9.340920e-64
SKIDA1 62.750860 1.774763e-63
RAB30 62.082850 8.263226e-63
GPLD1 60.855553 1.394590e-61
LRRC3 60.798368 1.590861e-61
PCDHA13 60.692151 2.031651e-61
SLC41A2 60.248759 5.639500e-61
HCN3 60.039020 9.140706e-61
KHDC1 59.410117 3.889407e-60
ST8SIA3 56.592425 2.556085e-57
UNC79 53.534560 2.920384e-54
Code
results_table_py = run_ora_catchErrors(mat=rank.T, net=curated, source='geneset', target='genesymbol', verbose=False, n_up=len(rank), n_bottom=0)
len(results_table_py)
No significant term was found
0
Code
intTable(results_table_py, folder = folder, fileName = 'Reactome_all.xlsx', save = True)
Code
if len(results_table_py) > 0:
    results_table_py = getAnnGenes(results_table_py, GO2gene['reactome_pathways'], rank_copy)
    _, df = plotGenesInTerm(results = results_table_py, GO2gene = GO2gene['reactome_pathways'], DEGs = rank_copy, n_top_terms = 10, cmap = cmap_all)
Code
if len(results_table_py) > 0:
    intTable(df, folder = folder, fileName = 'genesInTerm_Reactome_all.xlsx', save = True)

4.1.0.2 KEGG

Code
curated = msigdb[msigdb['collection'].isin(['kegg_pathways'])]
curated = curated[~curated.duplicated(['geneset', 'genesymbol'])]

aggregated = curated[["geneset", "genesymbol"]].groupby("geneset").count().rename(columns={"genesymbol": "gene_count"})
curated = curated[~curated.geneset.isin(aggregated[aggregated.gene_count > 200].index.tolist())].copy()
curated = curated[~curated.geneset.isin(aggregated[aggregated.gene_count < 15].index.tolist())].copy()
Code
results_table_py = run_ora_catchErrors(mat=rank.T, net=curated, source='geneset', target='genesymbol', verbose=False, n_up=len(rank), n_bottom=0)
No significant term was found
Code
intTable(results_table_py, folder = folder, fileName = 'KEGG_all.xlsx', save = True)
Code
if len(results_table_py) > 0:
    results_table_py = getAnnGenes(results_table_py, GO2gene['kegg_pathways'], rank_copy)
    _, df = plotGenesInTerm(results_table_py, GO2gene['kegg_pathways'], rank_copy, n_top_terms = 10, n_top_genes = 15, cmap = cmap_all)
Code
if len(results_table_py) > 0:
    intTable(df, folder = folder, fileName = 'genesInTerm_KEGG_all.xlsx', save = True)