Load environment
Code
import warnings
warnings.filterwarnings("ignore" )
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import pandas as pd
import numpy as np
import random
import itertools
from tqdm import tqdm
import decoupler as dc
import sys
sys.setrecursionlimit(20000 )
sys.path.append("./../../../../utilities_folder" )
from utilities import load_object, intTable, plotGenesInTerm, getAnnGenes, run_ora_catchErrors
Set R environment with rpy2:
Code
import rpy2.rinterface_lib.callbacks
import anndata2ri
import logging
from rpy2.robjects import pandas2ri
import rpy2.robjects as ro
sc.settings.verbosity = 0
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
pandas2ri.activate()
anndata2ri.activate()
% load_ext rpy2.ipython
Set up of graphical parameters for Python plots:
Code
% matplotlib inline
sc.set_figure_params(dpi = 300 , fontsize = 20 )
plt.rcParams['svg.fonttype' ] = 'none'
cmap_up = sns.light_palette("red" , as_cmap= True )
cmap_down = sns.light_palette("blue" , as_cmap= True )
cmap_all = sns.light_palette("seagreen" , as_cmap= True )
Set up of graphical parameters for R plots:
Code
default_units = 'in'
default_res = 300
default_width = 10
default_height = 9
import rpy2
old_setup_graphics = rpy2.ipython.rmagic.RMagics.setup_graphics
def new_setup_graphics(self , args):
if getattr (args, 'units' ) is not None :
if args.units != default_units:
return old_setup_graphics(self , args)
args.units = default_units
if getattr (args, 'res' ) is None :
args.res = default_res
if getattr (args, 'width' ) is None :
args.width = default_width
if getattr (args, 'height' ) is None :
args.height = default_height
return old_setup_graphics(self , args)
rpy2.ipython.rmagic.RMagics.setup_graphics = new_setup_graphics
Here the cell were we inject the parameters using Quarto renderer:
Code
# Injected Parameters
N = 3
Code
# Injected Parameters
N = 13
Import R libraries:
Code
%% R
source('./../../../../utilities_folder/GO_helper.r' )
loc <- './../../../../R_loc' # pointing to the renv environment
.libPaths(loc)
library('topGO' )
library('org.Hs.eg.db' )
library(dplyr)
library(ggplot2)
Set output folders:
Code
output_folder = './'
folder = './tables/cluster_' + str (N) + '/'
Load data
Here we load the dataframe:
Code
markers = pd.read_excel(folder + 'genes_in_cluster_' + str (N) + '.xlsx' , index_col = 0 )
markers
logFC.celltypes_leveledhEGCLC
logFC.celltypes_leveledhPGCLC
logFC.celltypes_levelediMeLC
logCPM
LR
PValue
FDR
clusters
ACTA1
11.493834
0
13.291811
2.882094
511.356736
1.649850e-110
2.513090e-109
13
SPIB
12.364728
0
11.586142
2.885391
486.441048
4.140128e-105
5.818964e-104
13
SERTAD4
10.616763
0
12.800036
2.279025
413.057395
3.286390e-89
3.487758e-88
13
CACNG4
11.786840
0
11.698635
2.475937
406.995026
6.760063e-88
7.025198e-87
13
EMILIN1
11.896756
0
10.995019
2.425626
404.247221
2.661738e-87
2.735346e-86
13
POPDC3
11.216938
0
11.819366
2.133988
353.461856
2.655021e-76
2.210001e-75
13
MLKL
10.852160
0
12.158127
2.061818
352.706863
3.868575e-76
3.213461e-75
13
AC007906.2
10.481626
0
12.280169
1.950036
350.659466
1.073694e-75
8.863514e-75
13
FBXO48
11.367965
0
11.230980
2.070526
342.220777
7.212057e-74
5.744282e-73
13
CCDC3
10.619262
0
12.101212
1.921517
339.272319
3.136467e-73
2.469474e-72
13
KCNH8
11.338643
0
11.048491
2.009845
335.210888
2.375579e-72
1.834882e-71
13
ANGPT1
10.617072
0
11.965527
1.864253
328.988159
5.284128e-71
3.976514e-70
13
DYRK3
10.986880
0
11.658736
1.942958
328.902937
5.513445e-71
4.146484e-70
13
RASGEF1B
10.876823
0
11.775267
1.920243
328.638295
6.290941e-71
4.719385e-70
13
MET
11.166657
0
11.239120
1.937661
325.275316
3.363236e-70
2.479663e-69
13
SHISAL1
11.176399
0
11.206281
1.938033
325.158890
3.564196e-70
2.626215e-69
13
HIST1H4I
10.859842
0
11.656711
1.871779
321.243636
2.509192e-69
1.822010e-68
13
CCDC160
11.047945
0
11.260604
1.863750
316.592564
2.548906e-68
1.821103e-67
13
C12orf60
11.044474
0
11.265925
1.859689
316.397164
2.809644e-68
2.006196e-67
13
VWA2
10.838110
0
11.545149
1.817438
314.126288
8.713880e-68
6.174303e-67
13
LRRC8C
10.944186
0
11.228256
1.784143
307.639474
2.209546e-66
1.531246e-65
13
PRR5L
10.747022
0
11.484815
1.741472
305.705429
5.793153e-66
3.996277e-65
13
CTRL
10.965316
0
11.088085
1.761878
304.594342
1.007857e-65
6.936529e-65
13
BMP1
10.639870
0
11.475658
1.678547
299.339185
1.382889e-64
9.340920e-64
13
SKIDA1
10.676182
0
11.409880
1.677247
298.038824
2.643736e-64
1.774763e-63
13
RAB30
10.927523
0
10.850198
1.676287
294.937649
1.239863e-63
8.263226e-63
13
GPLD1
10.824435
0
10.932813
1.626964
289.220189
2.141323e-62
1.394590e-61
13
LRRC3
10.756358
0
11.076017
1.619414
288.951581
2.447988e-62
1.590861e-61
13
PCDHA13
10.738773
0
11.094081
1.614907
288.458574
3.129653e-62
2.031651e-61
13
SLC41A2
10.802256
0
10.892097
1.602220
286.396618
8.743739e-62
5.639500e-61
13
HCN3
10.606034
0
11.225528
1.574186
285.417684
1.424071e-61
9.140706e-61
13
KHDC1
10.730836
0
10.936155
1.564449
282.486777
6.134006e-61
3.889407e-60
13
ST8SIA3
10.476971
0
11.023575
1.433392
269.377827
4.207969e-58
2.556085e-57
13
UNC79
10.329794
0
10.878265
1.296046
255.141290
5.055867e-55
2.920384e-54
13
Code
allGenes_series = pd.read_csv('./tables/all_bkg_genes.csv' )
allGenes = allGenes_series['0' ].tolist()
Here we load the dictionary that associates to each GO term its genes:
Code
GO2gene = load_object('./../../../../data/GO2gene_complete.pickle' )
Markers of cluster
We filter genes for the cluster under investigation based on the p-value adjusted that we then convert in -log(p-value adjusted):
Code
markers = markers[markers.FDR < 0.01 ]
markers['-log10(FDR)' ] = - np.log10(markers.FDR)
markers = markers.replace(np.inf, markers[markers['-log10(FDR)' ] != np.inf]['-log10(FDR)' ].max ())
markers
logFC.celltypes_leveledhEGCLC
logFC.celltypes_leveledhPGCLC
logFC.celltypes_levelediMeLC
logCPM
LR
PValue
FDR
clusters
-log10(FDR)
ACTA1
11.493834
0
13.291811
2.882094
511.356736
1.649850e-110
2.513090e-109
13
108.599792
SPIB
12.364728
0
11.586142
2.885391
486.441048
4.140128e-105
5.818964e-104
13
103.235154
SERTAD4
10.616763
0
12.800036
2.279025
413.057395
3.286390e-89
3.487758e-88
13
87.457454
CACNG4
11.786840
0
11.698635
2.475937
406.995026
6.760063e-88
7.025198e-87
13
86.153341
EMILIN1
11.896756
0
10.995019
2.425626
404.247221
2.661738e-87
2.735346e-86
13
85.562988
POPDC3
11.216938
0
11.819366
2.133988
353.461856
2.655021e-76
2.210001e-75
13
74.655607
MLKL
10.852160
0
12.158127
2.061818
352.706863
3.868575e-76
3.213461e-75
13
74.493027
AC007906.2
10.481626
0
12.280169
1.950036
350.659466
1.073694e-75
8.863514e-75
13
74.052394
FBXO48
11.367965
0
11.230980
2.070526
342.220777
7.212057e-74
5.744282e-73
13
72.240764
CCDC3
10.619262
0
12.101212
1.921517
339.272319
3.136467e-73
2.469474e-72
13
71.607396
KCNH8
11.338643
0
11.048491
2.009845
335.210888
2.375579e-72
1.834882e-71
13
70.736392
ANGPT1
10.617072
0
11.965527
1.864253
328.988159
5.284128e-71
3.976514e-70
13
69.400498
DYRK3
10.986880
0
11.658736
1.942958
328.902937
5.513445e-71
4.146484e-70
13
69.382320
RASGEF1B
10.876823
0
11.775267
1.920243
328.638295
6.290941e-71
4.719385e-70
13
69.326115
MET
11.166657
0
11.239120
1.937661
325.275316
3.363236e-70
2.479663e-69
13
68.605607
SHISAL1
11.176399
0
11.206281
1.938033
325.158890
3.564196e-70
2.626215e-69
13
68.580670
HIST1H4I
10.859842
0
11.656711
1.871779
321.243636
2.509192e-69
1.822010e-68
13
67.739449
CCDC160
11.047945
0
11.260604
1.863750
316.592564
2.548906e-68
1.821103e-67
13
66.739666
C12orf60
11.044474
0
11.265925
1.859689
316.397164
2.809644e-68
2.006196e-67
13
66.697627
VWA2
10.838110
0
11.545149
1.817438
314.126288
8.713880e-68
6.174303e-67
13
66.209412
LRRC8C
10.944186
0
11.228256
1.784143
307.639474
2.209546e-66
1.531246e-65
13
64.814955
PRR5L
10.747022
0
11.484815
1.741472
305.705429
5.793153e-66
3.996277e-65
13
64.398344
CTRL
10.965316
0
11.088085
1.761878
304.594342
1.007857e-65
6.936529e-65
13
64.158858
BMP1
10.639870
0
11.475658
1.678547
299.339185
1.382889e-64
9.340920e-64
13
63.029610
SKIDA1
10.676182
0
11.409880
1.677247
298.038824
2.643736e-64
1.774763e-63
13
62.750860
RAB30
10.927523
0
10.850198
1.676287
294.937649
1.239863e-63
8.263226e-63
13
62.082850
GPLD1
10.824435
0
10.932813
1.626964
289.220189
2.141323e-62
1.394590e-61
13
60.855553
LRRC3
10.756358
0
11.076017
1.619414
288.951581
2.447988e-62
1.590861e-61
13
60.798368
PCDHA13
10.738773
0
11.094081
1.614907
288.458574
3.129653e-62
2.031651e-61
13
60.692151
SLC41A2
10.802256
0
10.892097
1.602220
286.396618
8.743739e-62
5.639500e-61
13
60.248759
HCN3
10.606034
0
11.225528
1.574186
285.417684
1.424071e-61
9.140706e-61
13
60.039020
KHDC1
10.730836
0
10.936155
1.564449
282.486777
6.134006e-61
3.889407e-60
13
59.410117
ST8SIA3
10.476971
0
11.023575
1.433392
269.377827
4.207969e-58
2.556085e-57
13
56.592425
UNC79
10.329794
0
10.878265
1.296046
255.141290
5.055867e-55
2.920384e-54
13
53.534560
All regulated
Code
all_sign = markers.index.tolist()
allSelected = allGenes_series['0' ].isin(all_sign).astype('int' ).tolist()
topGO
All significant
Code
%% R - i allSelected - i allGenes
allGenes_v <- c(allSelected)
#print(allGenes_v)
names(allGenes_v) <- allGenes
allGenes_v <- unlist(allGenes_v)
geneNames <- c(allGenes)
ann_org_BP <- topGO::annFUN.org(whichOnto= 'BP' , feasibleGenes= names(allGenes_v),
mapping= 'org.Hs.eg' , ID= 'symbol' )
ann_org_MF <- topGO::annFUN.org(whichOnto= 'MF' , feasibleGenes= names(allGenes_v),
mapping= 'org.Hs.eg' , ID= 'symbol' )
ann_org_CC <- topGO::annFUN.org(whichOnto= 'CC' , feasibleGenes= names(allGenes_v),
mapping= 'org.Hs.eg' , ID= 'symbol' )
selection <- function(allScores){return (as .logical(allScores))}
Code
%% R
#print(lapply(ann_org_BP, count_genes))
GOdata <- new("topGOdata" ,
ontology= "BP" ,
allGenes= allGenes_v,
annot= annFUN.GO2genes,
GO2genes= ann_org_BP,
geneSel = selection,
nodeSize= 10 )
Code
%% R - o results
results <- runTest(GOdata, algorithm= "weight01" ,statistic= "fisher" )
Code
scores = ro.r.score(results)
score_names = ro.r(
'''
names(results@score)
'''
)
go_data = ro.r.GOdata
genesData = ro.r(
'''
geneData(results)
'''
)
genesData
array([10903, 25, 10, 1082], dtype=int32)
Code
#num_summarize = min(100, len(score_names))
results_table = ro.r.GenTable(go_data, weight= results,
orderBy= "weight" , topNodes= len (scores))
Code
results_table_py = ro.conversion.rpy2py(results_table)
Code
scores_py = ro.conversion.rpy2py(scores)
score_names = [i for i in score_names]
Code
scores_df = pd.DataFrame({'Scores' : scores_py, 'GO.ID' : score_names})
results_table_py = results_table_py.merge(scores_df, left_on = 'GO.ID' , right_on = 'GO.ID' )
results_table_py
GO.ID
Term
Annotated
Significant
Expected
weight
Scores
0
GO:0050918
positive chemotaxis
33
2
0.08
0.0026
0.002552
1
GO:0014068
positive regulation of phosphatidylinosi...
45
2
0.10
0.0047
0.004704
2
GO:0030199
collagen fibril organization
45
2
0.10
0.0047
0.004704
3
GO:0090130
tissue migration
235
4
0.54
0.0062
0.006173
4
GO:0051897
positive regulation of protein kinase B ...
71
2
0.16
0.0114
0.011387
...
...
...
...
...
...
...
...
5697
GO:2001251
negative regulation of chromosome organi...
88
0
0.20
1.0000
1.000000
5698
GO:2001252
positive regulation of chromosome organi...
93
0
0.21
1.0000
1.000000
5699
GO:2001256
regulation of store-operated calcium ent...
11
0
0.03
1.0000
1.000000
5700
GO:2001258
negative regulation of cation channel ac...
23
0
0.05
1.0000
1.000000
5701
GO:2001267
regulation of cysteine-type endopeptidas...
13
0
0.03
1.0000
1.000000
5702 rows × 7 columns
Code
results_table_py = results_table_py[results_table_py['Scores' ] < 0.05 ]
results_table_py = results_table_py[results_table_py['Annotated' ] < 200 ]
results_table_py = results_table_py[results_table_py['Annotated' ] > 15 ]
Code
results_table_py['-log10(pvalue)' ] = - np.log10(results_table_py.Scores)
results_table_py['Significant/Annotated' ] = results_table_py['Significant' ] / results_table_py['Annotated' ]
Code
intTable(results_table_py, folder = folder, fileName = 'GO_BP_all.xlsx' , save = True )
Code
%% R - i folder
Res <- GenTable(GOdata, weight= results,
orderBy= "weight" , topNodes= length(score(results)))
#print(Res[0:10,])
colnames(Res) <- c("GO.ID" , "Term" , "Annotated" , "Significant" , "Expected" , "Statistics" )
Res$ER <- Res$Significant / Res$Expected
image = bubbleplot(Res, Ont = 'BP' , fillCol = 'forestgreen' )
ggsave(file = paste0(folder, "TopGO_results_BP.pdf" ), plot= image, width= 12 , height= 4 )
bubbleplot(Res, Ont = 'BP' , fillCol = 'forestgreen' )
Code
%% R - i markers
image = plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms= 15 , ngenes= 12 ,
fillCol= 'forestgreen' , log = TRUE)
ggsave(file = paste0(folder, "Genes_in_Term_results_BP.pdf" ), plot= image, width= 12 , height= 4 )
plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms= 15 , ngenes= 12 ,
fillCol= 'forestgreen' , log = TRUE)
Code
%% R - i markers - i folder
saveGenesInTerm(Res, GOdata, nterms = 20 , path = paste0(folder,'GO_BP_genesInTerm_all.xlsx' ), SE = markers)
Code
%% R
GOdata <- new("topGOdata" ,
ontology= "MF" ,
allGenes= allGenes_v,
annot= annFUN.GO2genes,
GO2genes= ann_org_MF,
geneSel = selection,
nodeSize= 10 )
Code
%% R - o results
results <- runTest(GOdata, algorithm= "weight01" ,statistic= "fisher" )
Code
scores = ro.r.score(results)
score_names = ro.r(
'''
names(results@score)
'''
)
go_data = ro.r.GOdata
genesData = ro.r(
'''
geneData(results)
'''
)
genesData
array([11203, 28, 10, 150], dtype=int32)
Code
#num_summarize = min(100, len(score_names))
results_table = ro.r.GenTable(go_data, weight= results,
orderBy= "weight" , topNodes= len (scores))
Code
results_table_py = ro.conversion.rpy2py(results_table)
Code
scores_py = ro.conversion.rpy2py(scores)
score_names = [i for i in score_names]
Code
scores_df = pd.DataFrame({'Scores' : scores_py, 'GO.ID' : score_names})
results_table_py = results_table_py.merge(scores_df, left_on = 'GO.ID' , right_on = 'GO.ID' )
results_table_py = results_table_py[results_table_py['Scores' ] < 0.05 ]
results_table_py = results_table_py[results_table_py['Annotated' ] < 200 ]
results_table_py = results_table_py[results_table_py['Annotated' ] > 15 ]
intTable(results_table_py, folder = folder, fileName = 'GO_MF_all.xlsx' , save = True )
Code
%% R
Res <- GenTable(GOdata, weight= results,
orderBy= "weight" , topNodes= length(score(results)))
#print(Res[0:10,])
colnames(Res) <- c("GO.ID" , "Term" , "Annotated" , "Significant" , "Expected" , "Statistics" )
Res$ER <- Res$Significant / Res$Expected
image = bubbleplot(Res, Ont = 'MF' , fillCol = 'forestgreen' )
ggsave(file = paste0(folder, "TopGO_results_MF.pdf" ), plot= image, width= 12 , height= 4 )
bubbleplot(Res, Ont = 'MF' , fillCol = 'forestgreen' )
Code
%% R - i markers
image = plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms= 15 , ngenes= 12 ,
fillCol= 'forestgreen' , log = TRUE)
ggsave(file = paste0(folder, "Genes_in_Term_results_MF.pdf" ), plot= image, width= 12 , height= 4 )
plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms= 15 , ngenes= 12 ,
fillCol= 'forestgreen' , log = TRUE)
Code
%% R - i markers - i folder
saveGenesInTerm(Res, GOdata, nterms = 20 , path = paste0(folder,'GO_MF_genesInTerm_all.xlsx' ), SE = markers)
Code
%% R
GOdata <- new("topGOdata" ,
ontology= "CC" ,
allGenes= allGenes_v,
annot= annFUN.GO2genes,
GO2genes= ann_org_CC,
geneSel = selection,
nodeSize= 10 )
Code
%% R - o results
results <- runTest(GOdata, algorithm= "weight01" ,statistic= "fisher" )
Code
scores = ro.r.score(results)
score_names = ro.r(
'''
names(results@score)
'''
)
go_data = ro.r.GOdata
genesData = ro.r(
'''
geneData(results)
'''
)
genesData
array([11323, 29, 10, 170], dtype=int32)
Code
#num_summarize = min(100, len(score_names))
results_table = ro.r.GenTable(go_data, weight= results,
orderBy= "weight" , topNodes= len (scores))
Code
results_table_py = ro.conversion.rpy2py(results_table)
Code
scores_py = ro.conversion.rpy2py(scores)
score_names = [i for i in score_names]
Code
scores_df = pd.DataFrame({'Scores' : scores_py, 'GO.ID' : score_names})
results_table_py = results_table_py.merge(scores_df, left_on = 'GO.ID' , right_on = 'GO.ID' )
Code
results_table_py = results_table_py[results_table_py['Scores' ] < 0.05 ]
results_table_py = results_table_py[results_table_py['Annotated' ] < 200 ]
results_table_py = results_table_py[results_table_py['Annotated' ] > 15 ]
intTable(results_table_py, folder = folder, fileName = 'GO_CC_all.xlsx' , save = True )
Code
%% R
Res <- GenTable(GOdata, weight= results,
orderBy= "weight" , topNodes= length(score(results)))
#print(Res[0:10,])
colnames(Res) <- c("GO.ID" , "Term" , "Annotated" , "Significant" , "Expected" , "Statistics" )
Res$ER <- Res$Significant / Res$Expected
image = bubbleplot(Res, Ont = 'CC' , fillCol = 'forestgreen' )
ggsave(file = paste0(folder, "TopGO_results_CC.pdf" ), plot= image, width= 12 , height= 4 )
bubbleplot(Res, Ont = 'CC' , fillCol = 'forestgreen' )
Code
%% R - i markers
image = plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms= 12 , ngenes= 12 ,
fillCol= 'forestgreen' , log = TRUE)
ggsave(file = paste0(folder, "Genes:_in_Term_results_CC.pdf" ), plot= image, width= 12 , height= 4 )
plotGenesInTerm_v1(Res, GOdata, SE = markers, nterms= 12 , ngenes= 12 ,
fillCol= 'forestgreen' , log = TRUE)
Code
%% R - i markers - i folder
saveGenesInTerm(Res, GOdata, nterms = 20 , path = paste0(folder,'GO_CC_genesInTerm_all.xlsx' ), SE = markers)
Reactome
Code
curated = msigdb[msigdb['collection' ].isin(['reactome_pathways' ])]
curated = curated[~ curated.duplicated(['geneset' , 'genesymbol' ])]
aggregated = curated[["geneset" , "genesymbol" ]].groupby("geneset" ).count().rename(columns= {"genesymbol" : "gene_count" })
curated = curated[~ curated.geneset.isin(aggregated[aggregated.gene_count > 200 ].index.tolist())].copy()
curated = curated[~ curated.geneset.isin(aggregated[aggregated.gene_count < 15 ].index.tolist())].copy()
Code
rank = pd.DataFrame(markers['-log10(FDR)' ])
rank_copy = rank.copy()
rank_copy['pval' ] = markers.loc[rank.index].FDR
Code
-log10(FDR)
pval
ACTA1
108.599792
2.513090e-109
SPIB
103.235154
5.818964e-104
SERTAD4
87.457454
3.487758e-88
CACNG4
86.153341
7.025198e-87
EMILIN1
85.562988
2.735346e-86
POPDC3
74.655607
2.210001e-75
MLKL
74.493027
3.213461e-75
AC007906.2
74.052394
8.863514e-75
FBXO48
72.240764
5.744282e-73
CCDC3
71.607396
2.469474e-72
KCNH8
70.736392
1.834882e-71
ANGPT1
69.400498
3.976514e-70
DYRK3
69.382320
4.146484e-70
RASGEF1B
69.326115
4.719385e-70
MET
68.605607
2.479663e-69
SHISAL1
68.580670
2.626215e-69
HIST1H4I
67.739449
1.822010e-68
CCDC160
66.739666
1.821103e-67
C12orf60
66.697627
2.006196e-67
VWA2
66.209412
6.174303e-67
LRRC8C
64.814955
1.531246e-65
PRR5L
64.398344
3.996277e-65
CTRL
64.158858
6.936529e-65
BMP1
63.029610
9.340920e-64
SKIDA1
62.750860
1.774763e-63
RAB30
62.082850
8.263226e-63
GPLD1
60.855553
1.394590e-61
LRRC3
60.798368
1.590861e-61
PCDHA13
60.692151
2.031651e-61
SLC41A2
60.248759
5.639500e-61
HCN3
60.039020
9.140706e-61
KHDC1
59.410117
3.889407e-60
ST8SIA3
56.592425
2.556085e-57
UNC79
53.534560
2.920384e-54
Code
results_table_py = run_ora_catchErrors(mat= rank.T, net= curated, source= 'geneset' , target= 'genesymbol' , verbose= False , n_up= len (rank), n_bottom= 0 )
len (results_table_py)
No significant term was found
Code
intTable(results_table_py, folder = folder, fileName = 'Reactome_all.xlsx' , save = True )
Code
if len (results_table_py) > 0 :
results_table_py = getAnnGenes(results_table_py, GO2gene['reactome_pathways' ], rank_copy)
_, df = plotGenesInTerm(results = results_table_py, GO2gene = GO2gene['reactome_pathways' ], DEGs = rank_copy, n_top_terms = 10 , cmap = cmap_all)
Code
if len (results_table_py) > 0 :
intTable(df, folder = folder, fileName = 'genesInTerm_Reactome_all.xlsx' , save = True )
KEGG
Code
curated = msigdb[msigdb['collection' ].isin(['kegg_pathways' ])]
curated = curated[~ curated.duplicated(['geneset' , 'genesymbol' ])]
aggregated = curated[["geneset" , "genesymbol" ]].groupby("geneset" ).count().rename(columns= {"genesymbol" : "gene_count" })
curated = curated[~ curated.geneset.isin(aggregated[aggregated.gene_count > 200 ].index.tolist())].copy()
curated = curated[~ curated.geneset.isin(aggregated[aggregated.gene_count < 15 ].index.tolist())].copy()
Code
results_table_py = run_ora_catchErrors(mat= rank.T, net= curated, source= 'geneset' , target= 'genesymbol' , verbose= False , n_up= len (rank), n_bottom= 0 )
No significant term was found
Code
intTable(results_table_py, folder = folder, fileName = 'KEGG_all.xlsx' , save = True )
Code
if len (results_table_py) > 0 :
results_table_py = getAnnGenes(results_table_py, GO2gene['kegg_pathways' ], rank_copy)
_, df = plotGenesInTerm(results_table_py, GO2gene['kegg_pathways' ], rank_copy, n_top_terms = 10 , n_top_genes = 15 , cmap = cmap_all)
Code
if len (results_table_py) > 0 :
intTable(df, folder = folder, fileName = 'genesInTerm_KEGG_all.xlsx' , save = True )