::opts_chunk$set(echo = TRUE, collapse = TRUE, warning = FALSE)
knitr
<- params$Condition_1
Condition_1 <- params$Condition_2 Condition_2
Top GO Analysis for hEGCLCs vs hPGCLCs
For this functional we will focus only on ALL differentially methylated genomic regions of protein coding genes!
library(DT)
library(ggplot2)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(gridExtra)
library(RColorBrewer)
library(viridis)
library(topGO)
library(data.table)
library(tidyr)
library(dplyr)
source("TopGO_helper_meth.R")
<- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder)
OutputFolder
if (dir.exists(OutputFolder) == FALSE) {
dir.create(OutputFolder, recursive=TRUE)
}
<- readRDS(params$DMRAnnotated)
DMRAnnotated <- readRDS(params$GeneUniverse) GeneUniverse_df
<- GeneUniverse_df[GeneUniverse_df$gene_biotype %in% "protein_coding",]
GeneUniverse_df_protein_coding <- unique(GeneUniverse_df_protein_coding$hgnc_symbol) GeneUniverse
We have 18940 protein-coding genes in the universe.
Saving of selected DMRs
<- DMRAnnotated[[paste0(Condition_1,"vs",Condition_2)]]
DMReg <- DMReg %>% filter(gene_biotype == "protein_coding") DMReg
Gene ontology enrichment analysis is performed on the set of DMGs (Differentially Methylates Genes) using TopGO with Fisher statistics and weight01 algorithm. DMGs are now splitted in down- and up-methylated.
I generate vectors for the gene universe, all modulated genes, up-meth genes and down-meth genes in the format required by TopGO.
<- topGOGeneVectors_meth(DMRAnnotated, Cond1 = Condition_1, Cond2 = Condition_2, genomic_type = "all", gene_type = "protein_coding", Universe = GeneUniverse)
GeneVectors ## [1] "All the genes associated to DMRs are in the gene universe!"
Therefore:
N.B.: The sum of down- and up- methylated genes is > the number of modulated genes because some genes are shared between up and down lists. This is because for example in a promoter of a gene meth can be higher for a cell type and in the gene body of the same gene meth can be higher for the other cell type!
<- ifelse(length(grep('BP', params$TopGO))!=0, TRUE, FALSE)
BpEval <- ifelse(length(grep('MF', params$TopGO))!=0, TRUE, FALSE)
MfEval <- ifelse(length(grep('CC', params$TopGO))!=0, TRUE, FALSE) CcEval
On the basis of the analysis settings, the enrichment for Biological Process IS performed.
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DMGenes),
BPann mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenes, gene2GO=BPann, ontology='BP',
ResBPAll desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPAll', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 12448 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15834 GO terms and 35900 relations. )
##
## Annotating nodes ...............
## ( 16734 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 5210 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 18: 3 nodes to be scored (0 eliminated genes)
##
## Level 17: 8 nodes to be scored (0 eliminated genes)
##
## Level 16: 14 nodes to be scored (37 eliminated genes)
##
## Level 15: 27 nodes to be scored (134 eliminated genes)
##
## Level 14: 61 nodes to be scored (300 eliminated genes)
##
## Level 13: 105 nodes to be scored (634 eliminated genes)
##
## Level 12: 187 nodes to be scored (1769 eliminated genes)
##
## Level 11: 370 nodes to be scored (4072 eliminated genes)
##
## Level 10: 576 nodes to be scored (6332 eliminated genes)
##
## Level 9: 754 nodes to be scored (8267 eliminated genes)
##
## Level 8: 801 nodes to be scored (10632 eliminated genes)
##
## Level 7: 835 nodes to be scored (12805 eliminated genes)
##
## Level 6: 695 nodes to be scored (14512 eliminated genes)
##
## Level 5: 426 nodes to be scored (15497 eliminated genes)
##
## Level 4: 235 nodes to be scored (16132 eliminated genes)
##
## Level 3: 94 nodes to be scored (16392 eliminated genes)
##
## Level 2: 18 nodes to be scored (16501 eliminated genes)
##
## Level 1: 1 nodes to be scored (16558 eliminated genes)
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenesDown, gene2GO=BPann, ontology='BP',
ResBPDown desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPDown', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 12448 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15834 GO terms and 35900 relations. )
##
## Annotating nodes ...............
## ( 16734 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 4921 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 18: 3 nodes to be scored (0 eliminated genes)
##
## Level 17: 8 nodes to be scored (0 eliminated genes)
##
## Level 16: 14 nodes to be scored (37 eliminated genes)
##
## Level 15: 26 nodes to be scored (134 eliminated genes)
##
## Level 14: 54 nodes to be scored (300 eliminated genes)
##
## Level 13: 92 nodes to be scored (619 eliminated genes)
##
## Level 12: 166 nodes to be scored (1685 eliminated genes)
##
## Level 11: 338 nodes to be scored (3934 eliminated genes)
##
## Level 10: 538 nodes to be scored (6181 eliminated genes)
##
## Level 9: 708 nodes to be scored (8110 eliminated genes)
##
## Level 8: 759 nodes to be scored (10537 eliminated genes)
##
## Level 7: 792 nodes to be scored (12724 eliminated genes)
##
## Level 6: 673 nodes to be scored (14115 eliminated genes)
##
## Level 5: 409 nodes to be scored (15475 eliminated genes)
##
## Level 4: 230 nodes to be scored (16129 eliminated genes)
##
## Level 3: 92 nodes to be scored (16389 eliminated genes)
##
## Level 2: 18 nodes to be scored (16500 eliminated genes)
##
## Level 1: 1 nodes to be scored (16558 eliminated genes)
# Selection on enrichment of at least 2 is implemented (also to avoid depleted categories). Then categories are ranked by PVal and all the ones with Pval < th are selected. If the number is < minTerms, othter terms are included to reach the minimum number.
<- topGOResults_new(Genes=GeneVectors$DMGenesUp, gene2GO=BPann, ontology='BP',
ResBPUp desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='BPUp', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 12448 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15834 GO terms and 35900 relations. )
##
## Annotating nodes ...............
## ( 16734 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 3896 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 17: 3 nodes to be scored (0 eliminated genes)
##
## Level 16: 6 nodes to be scored (0 eliminated genes)
##
## Level 15: 16 nodes to be scored (70 eliminated genes)
##
## Level 14: 40 nodes to be scored (192 eliminated genes)
##
## Level 13: 73 nodes to be scored (462 eliminated genes)
##
## Level 12: 124 nodes to be scored (1397 eliminated genes)
##
## Level 11: 247 nodes to be scored (3649 eliminated genes)
##
## Level 10: 386 nodes to be scored (5872 eliminated genes)
##
## Level 9: 544 nodes to be scored (7655 eliminated genes)
##
## Level 8: 588 nodes to be scored (9903 eliminated genes)
##
## Level 7: 641 nodes to be scored (12331 eliminated genes)
##
## Level 6: 561 nodes to be scored (14217 eliminated genes)
##
## Level 5: 363 nodes to be scored (15383 eliminated genes)
##
## Level 4: 198 nodes to be scored (16096 eliminated genes)
##
## Level 3: 87 nodes to be scored (16378 eliminated genes)
##
## Level 2: 18 nodes to be scored (16495 eliminated genes)
##
## Level 1: 1 nodes to be scored (16556 eliminated genes)
=15
nterms<- c(All='forestgreen', Down='blue', Up='red')
cols <- lapply(cols, {function(el) grDevices::colorRampPalette(c('gold', el))(nterms)}) pals
if(nrow(ResBPAll$ResSel)>0){
topGOBarplot_new(TopGORes=ResBPAll$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("All DMGenes - ", Condition_2, " vs ", Condition_1),
palette=pals$All, flip_x=FALSE)
}
if(nrow(ResBPDown$ResSel)>0){
topGOBarplot_new(TopGORes=ResBPDown$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Down-methylated genes (associated to DMRs where methylation is lower in ", Condition_2, ")"),
palette=pals$Down, flip_x=FALSE)
}
if(nrow(ResBPUp$ResSel)>0){
topGOBarplot_new(TopGORes=ResBPUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Up-methylated genes (associated to DMRs where methylation is higher in ", Condition_2, ")"),
palette=pals$Up, flip_x=TRUE)
}
if(nrow(ResBPAll$ResSel)>0 && nrow(ResBPDown$ResSel)>0 && nrow(ResBPUp$ResSel)>0){
topGOBarplotAll_new(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPDown$ResSel, TopGOResUp=ResBPUp$ResSel,
terms=nterms, pvalTh=0.01, plotTitle="TopGO analysis of DMGs - ")
}
if(nrow(ResBPAll$ResSel)>0){
bubbleplot(ResBPAll$ResSel, terms=nterms, Ont = "BP", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - All")
}
if(nrow(ResBPDown$ResSel)>0){
bubbleplot(ResBPDown$ResSel, terms=nterms, Ont = "BP", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Down", fillCol = "blue")
}
if(nrow(ResBPUp$ResSel)>0){
bubbleplot(ResBPUp$ResSel, terms=nterms, Ont = "BP", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Up", fillCol = "red")
}
if(nrow(ResBPAll$ResSel)>0){
plotGenesInTerm_v2(ResBPAll$ResSel, ResBPAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - All", Interactive=FALSE)
}
if(nrow(ResBPDown$ResSel)>0){
plotGenesInTerm_v2(ResBPDown$ResSel, ResBPAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Down", Interactive=FALSE, fillCol='blue')
}
if(nrow(ResBPUp$ResSel)>0){
plotGenesInTerm_v2(ResBPUp$ResSel, ResBPAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Up", Interactive=FALSE, fillCol='red')
}
On the basis of the analysis settings, the enrichment for Molecular Function IS performed.
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(GeneVectors$DMGenes),
MFann mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenes, gene2GO=MFann, ontology='MF',
ResMFAll desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='MFAll', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 4495 GO terms found. )
##
## Build GO DAG topology ..........
## ( 4939 GO terms and 6405 relations. )
##
## Annotating nodes ...............
## ( 17198 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 903 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 12: 1 nodes to be scored (0 eliminated genes)
##
## Level 11: 11 nodes to be scored (0 eliminated genes)
##
## Level 10: 18 nodes to be scored (17 eliminated genes)
##
## Level 9: 37 nodes to be scored (253 eliminated genes)
##
## Level 8: 65 nodes to be scored (1592 eliminated genes)
##
## Level 7: 125 nodes to be scored (3926 eliminated genes)
##
## Level 6: 171 nodes to be scored (4704 eliminated genes)
##
## Level 5: 212 nodes to be scored (6843 eliminated genes)
##
## Level 4: 185 nodes to be scored (10572 eliminated genes)
##
## Level 3: 60 nodes to be scored (13724 eliminated genes)
##
## Level 2: 17 nodes to be scored (14746 eliminated genes)
##
## Level 1: 1 nodes to be scored (17034 eliminated genes)
<- topGOResults_new(Genes=GeneVectors$DMGenesDown, gene2GO=MFann, ontology='MF',
ResMFDown desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='MFDown', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 4495 GO terms found. )
##
## Build GO DAG topology ..........
## ( 4939 GO terms and 6405 relations. )
##
## Annotating nodes ...............
## ( 17198 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 865 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 12: 1 nodes to be scored (0 eliminated genes)
##
## Level 11: 9 nodes to be scored (0 eliminated genes)
##
## Level 10: 16 nodes to be scored (17 eliminated genes)
##
## Level 9: 35 nodes to be scored (220 eliminated genes)
##
## Level 8: 62 nodes to be scored (1590 eliminated genes)
##
## Level 7: 118 nodes to be scored (3919 eliminated genes)
##
## Level 6: 164 nodes to be scored (4649 eliminated genes)
##
## Level 5: 205 nodes to be scored (6758 eliminated genes)
##
## Level 4: 179 nodes to be scored (10537 eliminated genes)
##
## Level 3: 58 nodes to be scored (13712 eliminated genes)
##
## Level 2: 17 nodes to be scored (14743 eliminated genes)
##
## Level 1: 1 nodes to be scored (17033 eliminated genes)
<- topGOResults_new(Genes=GeneVectors$DMGenesUp, gene2GO=MFann, ontology='MF',
ResMFUp desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='MFUp', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 4495 GO terms found. )
##
## Build GO DAG topology ..........
## ( 4939 GO terms and 6405 relations. )
##
## Annotating nodes ...............
## ( 17198 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 601 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 11: 8 nodes to be scored (0 eliminated genes)
##
## Level 10: 15 nodes to be scored (0 eliminated genes)
##
## Level 9: 28 nodes to be scored (205 eliminated genes)
##
## Level 8: 43 nodes to be scored (1541 eliminated genes)
##
## Level 7: 80 nodes to be scored (3809 eliminated genes)
##
## Level 6: 108 nodes to be scored (4465 eliminated genes)
##
## Level 5: 137 nodes to be scored (6341 eliminated genes)
##
## Level 4: 121 nodes to be scored (10072 eliminated genes)
##
## Level 3: 48 nodes to be scored (13268 eliminated genes)
##
## Level 2: 12 nodes to be scored (14561 eliminated genes)
##
## Level 1: 1 nodes to be scored (17024 eliminated genes)
if(nrow(ResMFAll$ResSel)>0){
topGOBarplot_new(TopGORes=ResMFAll$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("All DMGenes - ", Condition_2, " vs ", Condition_1),
palette=pals$All, flip_x=FALSE)
}
if(nrow(ResMFDown$ResSel)>0){
topGOBarplot_new(TopGORes=ResMFDown$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Down-methylated genes (associated to DMRs where methylation is lower in ", Condition_2, ")"),
palette=pals$Down, flip_x=FALSE)
}
if(nrow(ResMFUp$ResSel)>0){
topGOBarplot_new(TopGORes=ResMFUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Up-methylated genes (associated to DMRs where methylation is higher in ", Condition_2, ")"),
palette=pals$Up, flip_x=TRUE)
}
if(nrow(ResMFAll$ResSel)>0 && nrow(ResMFDown$ResSel)>0 && nrow(ResMFUp$ResSel)>0){
topGOBarplotAll_new(TopGOResAll=ResMFAll$ResSel, TopGOResDown=ResMFDown$ResSel, TopGOResUp=ResMFUp$ResSel,
terms=nterms, pvalTh=0.01, plotTitle= "TopGO analysis of DMGs - ")
}
if(nrow(ResMFAll$ResSel)>0){
bubbleplot(ResMFAll$ResSel, terms=nterms, Ont = "MF", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - All")
}
if(nrow(ResMFDown$ResSel)>0){
bubbleplot(ResMFDown$ResSel, terms=nterms, Ont = "MF", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Down", fillCol = "blue")
}
if(nrow(ResMFUp$ResSel)>0){
bubbleplot(ResMFUp$ResSel, terms=nterms, Ont = "MF", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Up", fillCol = "red")
}
if(nrow(ResMFAll$ResSel)>0){
plotGenesInTerm_v2(ResMFAll$ResSel, ResMFAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - All", Interactive=FALSE)
}
if(nrow(ResMFDown$ResSel)>0){
plotGenesInTerm_v2(ResMFDown$ResSel, ResMFAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Down", Interactive=FALSE, fillCol='blue')
}
if(nrow(ResMFUp$ResSel)>0){
plotGenesInTerm_v2(ResMFUp$ResSel, ResMFAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Up", Interactive=FALSE, fillCol='red')
}
On the basis of the analysis settings, the enrichment for Cellular Component IS performed.
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(GeneVectors$DMGenes),
CCann mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenes, gene2GO=CCann, ontology='CC',
ResCCAll desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='CCAll', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 1818 GO terms found. )
##
## Build GO DAG topology ..........
## ( 2014 GO terms and 3390 relations. )
##
## Annotating nodes ...............
## ( 17642 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 613 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 13: 1 nodes to be scored (0 eliminated genes)
##
## Level 12: 1 nodes to be scored (0 eliminated genes)
##
## Level 11: 26 nodes to be scored (30 eliminated genes)
##
## Level 10: 59 nodes to be scored (42 eliminated genes)
##
## Level 9: 95 nodes to be scored (1015 eliminated genes)
##
## Level 8: 95 nodes to be scored (3187 eliminated genes)
##
## Level 7: 91 nodes to be scored (6002 eliminated genes)
##
## Level 6: 88 nodes to be scored (9925 eliminated genes)
##
## Level 5: 69 nodes to be scored (12109 eliminated genes)
##
## Level 4: 46 nodes to be scored (14669 eliminated genes)
##
## Level 3: 39 nodes to be scored (16585 eliminated genes)
##
## Level 2: 2 nodes to be scored (17317 eliminated genes)
##
## Level 1: 1 nodes to be scored (17557 eliminated genes)
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenesDown, gene2GO=CCann, ontology='CC',
ResCCDown desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='CCDown', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 1818 GO terms found. )
##
## Build GO DAG topology ..........
## ( 2014 GO terms and 3390 relations. )
##
## Annotating nodes ...............
## ( 17642 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 580 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 13: 1 nodes to be scored (0 eliminated genes)
##
## Level 12: 1 nodes to be scored (0 eliminated genes)
##
## Level 11: 25 nodes to be scored (30 eliminated genes)
##
## Level 10: 55 nodes to be scored (42 eliminated genes)
##
## Level 9: 88 nodes to be scored (996 eliminated genes)
##
## Level 8: 94 nodes to be scored (3105 eliminated genes)
##
## Level 7: 88 nodes to be scored (5935 eliminated genes)
##
## Level 6: 83 nodes to be scored (9908 eliminated genes)
##
## Level 5: 65 nodes to be scored (12096 eliminated genes)
##
## Level 4: 41 nodes to be scored (14666 eliminated genes)
##
## Level 3: 36 nodes to be scored (16585 eliminated genes)
##
## Level 2: 2 nodes to be scored (17317 eliminated genes)
##
## Level 1: 1 nodes to be scored (17557 eliminated genes)
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenesUp, gene2GO=CCann, ontology='CC',
ResCCUp desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='CCUp', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 1818 GO terms found. )
##
## Build GO DAG topology ..........
## ( 2014 GO terms and 3390 relations. )
##
## Annotating nodes ...............
## ( 17642 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 482 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 13: 1 nodes to be scored (0 eliminated genes)
##
## Level 12: 1 nodes to be scored (0 eliminated genes)
##
## Level 11: 15 nodes to be scored (30 eliminated genes)
##
## Level 10: 37 nodes to be scored (42 eliminated genes)
##
## Level 9: 71 nodes to be scored (766 eliminated genes)
##
## Level 8: 75 nodes to be scored (2637 eliminated genes)
##
## Level 7: 74 nodes to be scored (5668 eliminated genes)
##
## Level 6: 72 nodes to be scored (9884 eliminated genes)
##
## Level 5: 59 nodes to be scored (12006 eliminated genes)
##
## Level 4: 38 nodes to be scored (14630 eliminated genes)
##
## Level 3: 36 nodes to be scored (16576 eliminated genes)
##
## Level 2: 2 nodes to be scored (17315 eliminated genes)
##
## Level 1: 1 nodes to be scored (17557 eliminated genes)
if(nrow(ResCCAll$ResSel)>0){
topGOBarplot_new(TopGORes=ResCCAll$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("All DMGenes - ", Condition_2, " vs ", Condition_1),
palette=pals$All, flip_x=FALSE)
}
if(nrow(ResCCDown$ResSel)>0){
topGOBarplot_new(TopGORes=ResCCDown$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Down-methylated genes (associated to DMRs where methylation is lower in ", Condition_2, ")"),
palette=pals$Down, flip_x=FALSE)
}
if(nrow(ResCCUp$ResSel)>0){
topGOBarplot_new(TopGORes=ResCCUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Up-methylated genes (associated to DMRs where methylation is higher in ", Condition_2, ")"), palette=pals$Up, flip_x=TRUE)
}
if(nrow(ResCCAll$ResSel)>0 && nrow(ResCCDown$ResSel)>0 && nrow(ResCCUp$ResSel)>0){
topGOBarplotAll_new(TopGOResAll=ResCCAll$ResSel, TopGOResDown=ResCCDown$ResSel, TopGOResUp=ResCCUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle="TopGO analysis of DMGs - ")
}
if(nrow(ResCCAll$ResSel)>0){
bubbleplot(ResCCAll$ResSel, terms=nterms, Ont = "CC", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - All")
}
if(nrow(ResCCDown$ResSel)>0){
bubbleplot(ResCCDown$ResSel, terms=nterms, Ont = "CC", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Down", fillCol = "blue")
}
if(nrow(ResCCUp$ResSel)>0){
bubbleplot(ResCCUp$ResSel, terms=nterms, Ont = "CC", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Up", fillCol = "red")
}
if(nrow(ResCCAll$ResSel)>0){
plotGenesInTerm_v2(ResCCAll$ResSel, ResCCAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - All", Interactive=FALSE)
}
if(nrow(ResCCDown$ResSel)>0){
plotGenesInTerm_v2(ResCCDown$ResSel, ResCCAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Down", Interactive=FALSE, fillCol='blue')
}
if(nrow(ResCCUp$ResSel)>0){
plotGenesInTerm_v2(ResCCUp$ResSel, ResCCAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Up", Interactive=FALSE, fillCol='red')
}
Most of the useful information has been saved during the analysis. Here I save the workspace and information about the session.
<- sessionInfo()
SessionInfo <- date() Date
Date## [1] "Fri Jan 10 12:37:47 2025"
SessionInfo## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods base
##
## other attached packages:
## [1] dplyr_1.1.2 tidyr_1.3.0 data.table_1.14.8 topGO_2.50.0 SparseM_1.81 GO.db_3.16.0 graph_1.76.0 viridis_0.6.2
## [9] viridisLite_0.4.2 RColorBrewer_1.1-3 gridExtra_2.3 org.Hs.eg.db_3.16.0 AnnotationDbi_1.60.2 IRanges_2.32.0 S4Vectors_0.36.2 Biobase_2.58.0
## [17] BiocGenerics_0.44.0 ggplot2_3.4.2 DT_0.28
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 R.utils_2.12.2 tidyselect_1.2.0 RSQLite_2.3.1
## [5] htmlwidgets_1.6.2 grid_4.2.1 BiocParallel_1.32.6 methylSig_1.10.0
## [9] scatterpie_0.1.8 munsell_0.5.0 codetools_0.2-19 withr_2.5.0
## [13] colorspace_2.1-0 GOSemSim_2.24.0 filelock_1.0.2 highr_0.10
## [17] knitr_1.43 rstudioapi_0.15.0 DOSE_3.24.2 MatrixGenerics_1.10.0
## [21] labeling_0.4.2 GenomeInfoDbData_1.2.9 polyclip_1.10-4 bit64_4.0.5
## [25] farver_2.1.1 rhdf5_2.42.1 vctrs_0.6.3 treeio_1.23.1
## [29] generics_0.1.3 xfun_0.39 BiocFileCache_2.6.1 R6_2.5.1
## [33] GenomeInfoDb_1.34.9 graphlayouts_0.8.4 locfit_1.5-9.7 bitops_1.0-7
## [37] rhdf5filters_1.10.1 cachem_1.0.8 fgsea_1.24.0 gridGraphics_0.5-1
## [41] DelayedArray_0.24.0 BiocIO_1.8.0 scales_1.2.1 bsseq_1.34.0
## [45] ggraph_2.1.0 enrichplot_1.18.4 gtable_0.3.3 tidygraph_1.2.3
## [49] rlang_1.1.1 splines_4.2.1 rtracklayer_1.58.0 lazyeval_0.2.2
## [53] DSS_2.46.0 BiocManager_1.30.25 yaml_2.3.7 reshape2_1.4.4
## [57] GenomicFeatures_1.50.4 qvalue_2.30.0 tools_4.2.1 ggplotify_0.1.0
## [61] gplots_3.1.3 jquerylib_0.1.4 Rcpp_1.0.11 plyr_1.8.8
## [65] sparseMatrixStats_1.10.0 progress_1.2.2 zlibbioc_1.44.0 purrr_1.0.1
## [69] RCurl_1.98-1.12 prettyunits_1.1.1 cowplot_1.1.1 SummarizedExperiment_1.28.0
## [73] ggrepel_0.9.3 magrittr_2.0.3 matrixStats_1.0.0 hms_1.1.3
## [77] patchwork_1.1.2 evaluate_0.21 HDO.db_0.99.1 XML_3.99-0.14
## [81] compiler_4.2.1 biomaRt_2.54.1 tibble_3.2.1 KernSmooth_2.23-22
## [85] crayon_1.5.2 shadowtext_0.1.2 R.oo_1.25.0 htmltools_0.5.5
## [89] ggfun_0.0.9 aplot_0.1.10 DBI_1.1.3 tweenr_2.0.2
## [93] ChIPseeker_1.34.1 dbplyr_2.3.3 MASS_7.3-60 rappdirs_0.3.3
## [97] boot_1.3-28.1 Matrix_1.6-0 permute_0.9-7 cli_3.6.1
## [101] R.methodsS3_1.8.2 parallel_4.2.1 igraph_1.5.0 GenomicRanges_1.50.2
## [105] pkgconfig_2.0.3 TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 GenomicAlignments_1.34.1 plotly_4.10.2
## [109] xml2_1.3.5 ggtree_3.6.2 bslib_0.5.0 XVector_0.38.0
## [113] yulab.utils_0.0.6 stringr_1.5.0 digest_0.6.33 Biostrings_2.66.0
## [117] rmarkdown_2.23 fastmatch_1.1-3 tidytree_0.4.2 DelayedMatrixStats_1.20.0
## [121] restfulr_0.0.15 curl_5.0.1 Rsamtools_2.14.0 gtools_3.9.4
## [125] rjson_0.2.21 lifecycle_1.0.3 nlme_3.1-162 jsonlite_1.8.7
## [129] Rhdf5lib_1.20.0 limma_3.54.2 BSgenome_1.66.3 fansi_1.0.4
## [133] pillar_1.9.0 lattice_0.21-8 plotrix_3.8-2 KEGGREST_1.38.0
## [137] fastmap_1.1.1 httr_1.4.6 glue_1.6.2 png_0.1-8
## [141] bit_4.0.5 ggforce_0.4.1 stringi_1.7.12 sass_0.4.7
## [145] HDF5Array_1.26.0 blob_1.2.4 caTools_1.18.2 memoise_2.0.1
## [149] renv_1.0.9 ape_5.7-1