::opts_chunk$set(echo = TRUE, collapse = TRUE, warning = FALSE)
knitr
<- params$Condition_1
Condition_1 <- params$Condition_2 Condition_2
Top GO Analysis for hiPSCs vs hPGCLCs
For this functional we will focus only on ALL differentially methylated genomic regions of protein coding genes!
library(DT)
library(ggplot2)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(gridExtra)
library(RColorBrewer)
library(viridis)
library(topGO)
library(data.table)
library(tidyr)
library(dplyr)
source("TopGO_helper_meth.R")
<- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder)
OutputFolder
if (dir.exists(OutputFolder) == FALSE) {
dir.create(OutputFolder, recursive=TRUE)
}
<- readRDS(params$DMRAnnotated)
DMRAnnotated <- readRDS(params$GeneUniverse) GeneUniverse_df
<- GeneUniverse_df[GeneUniverse_df$gene_biotype %in% "protein_coding",]
GeneUniverse_df_protein_coding <- unique(GeneUniverse_df_protein_coding$hgnc_symbol) GeneUniverse
We have 18940 protein-coding genes in the universe.
Saving of selected DMRs
<- DMRAnnotated[[paste0(Condition_1,"vs",Condition_2)]]
DMReg <- DMReg %>% filter(gene_biotype == "protein_coding") DMReg
Gene ontology enrichment analysis is performed on the set of DMGs (Differentially Methylates Genes) using TopGO with Fisher statistics and weight01 algorithm. DMGs are now splitted in down- and up-methylated.
I generate vectors for the gene universe, all modulated genes, up-meth genes and down-meth genes in the format required by TopGO.
<- topGOGeneVectors_meth(DMRAnnotated, Cond1 = Condition_1, Cond2 = Condition_2, genomic_type = "all", gene_type = "protein_coding", Universe = GeneUniverse)
GeneVectors ## [1] "All the genes associated to DMRs are in the gene universe!"
Therefore:
N.B.: The sum of down- and up- methylated genes is > the number of modulated genes because some genes are shared between up and down lists. This is because for example in a promoter of a gene meth can be higher for a cell type and in the gene body of the same gene meth can be higher for the other cell type!
<- ifelse(length(grep('BP', params$TopGO))!=0, TRUE, FALSE)
BpEval <- ifelse(length(grep('MF', params$TopGO))!=0, TRUE, FALSE)
MfEval <- ifelse(length(grep('CC', params$TopGO))!=0, TRUE, FALSE) CcEval
On the basis of the analysis settings, the enrichment for Biological Process IS performed.
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DMGenes),
BPann mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenes, gene2GO=BPann, ontology='BP',
ResBPAll desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPAll', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 12448 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15834 GO terms and 35900 relations. )
##
## Annotating nodes ...............
## ( 16734 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 5211 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 18: 3 nodes to be scored (0 eliminated genes)
##
## Level 17: 8 nodes to be scored (0 eliminated genes)
##
## Level 16: 14 nodes to be scored (37 eliminated genes)
##
## Level 15: 28 nodes to be scored (134 eliminated genes)
##
## Level 14: 59 nodes to be scored (300 eliminated genes)
##
## Level 13: 102 nodes to be scored (646 eliminated genes)
##
## Level 12: 185 nodes to be scored (1744 eliminated genes)
##
## Level 11: 369 nodes to be scored (4034 eliminated genes)
##
## Level 10: 584 nodes to be scored (6289 eliminated genes)
##
## Level 9: 746 nodes to be scored (8220 eliminated genes)
##
## Level 8: 805 nodes to be scored (10632 eliminated genes)
##
## Level 7: 840 nodes to be scored (12768 eliminated genes)
##
## Level 6: 693 nodes to be scored (14177 eliminated genes)
##
## Level 5: 426 nodes to be scored (15504 eliminated genes)
##
## Level 4: 236 nodes to be scored (16132 eliminated genes)
##
## Level 3: 94 nodes to be scored (16392 eliminated genes)
##
## Level 2: 18 nodes to be scored (16500 eliminated genes)
##
## Level 1: 1 nodes to be scored (16558 eliminated genes)
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenesDown, gene2GO=BPann, ontology='BP',
ResBPDown desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPDown', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 12448 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15834 GO terms and 35900 relations. )
##
## Annotating nodes ...............
## ( 16734 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 5169 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 18: 3 nodes to be scored (0 eliminated genes)
##
## Level 17: 8 nodes to be scored (0 eliminated genes)
##
## Level 16: 14 nodes to be scored (37 eliminated genes)
##
## Level 15: 28 nodes to be scored (134 eliminated genes)
##
## Level 14: 59 nodes to be scored (300 eliminated genes)
##
## Level 13: 102 nodes to be scored (646 eliminated genes)
##
## Level 12: 185 nodes to be scored (1744 eliminated genes)
##
## Level 11: 362 nodes to be scored (4034 eliminated genes)
##
## Level 10: 575 nodes to be scored (6289 eliminated genes)
##
## Level 9: 736 nodes to be scored (8182 eliminated genes)
##
## Level 8: 796 nodes to be scored (10622 eliminated genes)
##
## Level 7: 837 nodes to be scored (12749 eliminated genes)
##
## Level 6: 692 nodes to be scored (14171 eliminated genes)
##
## Level 5: 424 nodes to be scored (15504 eliminated genes)
##
## Level 4: 236 nodes to be scored (16132 eliminated genes)
##
## Level 3: 93 nodes to be scored (16392 eliminated genes)
##
## Level 2: 18 nodes to be scored (16500 eliminated genes)
##
## Level 1: 1 nodes to be scored (16558 eliminated genes)
# Selection on enrichment of at least 2 is implemented (also to avoid depleted categories). Then categories are ranked by PVal and all the ones with Pval < th are selected. If the number is < minTerms, othter terms are included to reach the minimum number.
<- topGOResults_new(Genes=GeneVectors$DMGenesUp, gene2GO=BPann, ontology='BP',
ResBPUp desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='BPUp', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 12448 GO terms found. )
##
## Build GO DAG topology ..........
## ( 15834 GO terms and 35900 relations. )
##
## Annotating nodes ...............
## ( 16734 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 2291 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 17: 3 nodes to be scored (0 eliminated genes)
##
## Level 16: 4 nodes to be scored (0 eliminated genes)
##
## Level 15: 11 nodes to be scored (70 eliminated genes)
##
## Level 14: 18 nodes to be scored (146 eliminated genes)
##
## Level 13: 35 nodes to be scored (323 eliminated genes)
##
## Level 12: 51 nodes to be scored (871 eliminated genes)
##
## Level 11: 115 nodes to be scored (3008 eliminated genes)
##
## Level 10: 191 nodes to be scored (4672 eliminated genes)
##
## Level 9: 297 nodes to be scored (5945 eliminated genes)
##
## Level 8: 342 nodes to be scored (7969 eliminated genes)
##
## Level 7: 379 nodes to be scored (10259 eliminated genes)
##
## Level 6: 361 nodes to be scored (12969 eliminated genes)
##
## Level 5: 257 nodes to be scored (14404 eliminated genes)
##
## Level 4: 140 nodes to be scored (15886 eliminated genes)
##
## Level 3: 71 nodes to be scored (16276 eliminated genes)
##
## Level 2: 15 nodes to be scored (16462 eliminated genes)
##
## Level 1: 1 nodes to be scored (16542 eliminated genes)
=15
nterms<- c(All='forestgreen', Down='blue', Up='red')
cols <- lapply(cols, {function(el) grDevices::colorRampPalette(c('gold', el))(nterms)}) pals
if(nrow(ResBPAll$ResSel)>0){
topGOBarplot_new(TopGORes=ResBPAll$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("All DMGenes - ", Condition_2, " vs ", Condition_1),
palette=pals$All, flip_x=FALSE)
}
if(nrow(ResBPDown$ResSel)>0){
topGOBarplot_new(TopGORes=ResBPDown$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Down-methylated genes (associated to DMRs where methylation is lower in ", Condition_2, ")"),
palette=pals$Down, flip_x=FALSE)
}
if(nrow(ResBPUp$ResSel)>0){
topGOBarplot_new(TopGORes=ResBPUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Up-methylated genes (associated to DMRs where methylation is higher in ", Condition_2, ")"),
palette=pals$Up, flip_x=TRUE)
}
if(nrow(ResBPAll$ResSel)>0 && nrow(ResBPDown$ResSel)>0 && nrow(ResBPUp$ResSel)>0){
topGOBarplotAll_new(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPDown$ResSel, TopGOResUp=ResBPUp$ResSel,
terms=nterms, pvalTh=0.01, plotTitle="TopGO analysis of DMGs - ")
}
if(nrow(ResBPAll$ResSel)>0){
bubbleplot(ResBPAll$ResSel, terms=nterms, Ont = "BP", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - All")
}
if(nrow(ResBPDown$ResSel)>0){
bubbleplot(ResBPDown$ResSel, terms=nterms, Ont = "BP", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Down", fillCol = "blue")
}
if(nrow(ResBPUp$ResSel)>0){
bubbleplot(ResBPUp$ResSel, terms=nterms, Ont = "BP", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Up", fillCol = "red")
}
if(nrow(ResBPAll$ResSel)>0){
plotGenesInTerm_v2(ResBPAll$ResSel, ResBPAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - All", Interactive=FALSE)
}
if(nrow(ResBPDown$ResSel)>0){
plotGenesInTerm_v2(ResBPDown$ResSel, ResBPAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Down", Interactive=FALSE, fillCol='blue')
}
if(nrow(ResBPUp$ResSel)>0){
plotGenesInTerm_v2(ResBPUp$ResSel, ResBPAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Up", Interactive=FALSE, fillCol='red')
}
On the basis of the analysis settings, the enrichment for Molecular Function IS performed.
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(GeneVectors$DMGenes),
MFann mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenes, gene2GO=MFann, ontology='MF',
ResMFAll desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='MFAll', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 4495 GO terms found. )
##
## Build GO DAG topology ..........
## ( 4939 GO terms and 6405 relations. )
##
## Annotating nodes ...............
## ( 17198 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 926 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 12: 1 nodes to be scored (0 eliminated genes)
##
## Level 11: 10 nodes to be scored (0 eliminated genes)
##
## Level 10: 16 nodes to be scored (17 eliminated genes)
##
## Level 9: 38 nodes to be scored (237 eliminated genes)
##
## Level 8: 70 nodes to be scored (1590 eliminated genes)
##
## Level 7: 132 nodes to be scored (3927 eliminated genes)
##
## Level 6: 177 nodes to be scored (4748 eliminated genes)
##
## Level 5: 217 nodes to be scored (6890 eliminated genes)
##
## Level 4: 188 nodes to be scored (10615 eliminated genes)
##
## Level 3: 59 nodes to be scored (13753 eliminated genes)
##
## Level 2: 17 nodes to be scored (14764 eliminated genes)
##
## Level 1: 1 nodes to be scored (17033 eliminated genes)
<- topGOResults_new(Genes=GeneVectors$DMGenesDown, gene2GO=MFann, ontology='MF',
ResMFDown desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='MFDown', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 4495 GO terms found. )
##
## Build GO DAG topology ..........
## ( 4939 GO terms and 6405 relations. )
##
## Annotating nodes ...............
## ( 17198 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 923 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 12: 1 nodes to be scored (0 eliminated genes)
##
## Level 11: 10 nodes to be scored (0 eliminated genes)
##
## Level 10: 16 nodes to be scored (17 eliminated genes)
##
## Level 9: 38 nodes to be scored (237 eliminated genes)
##
## Level 8: 69 nodes to be scored (1590 eliminated genes)
##
## Level 7: 132 nodes to be scored (3927 eliminated genes)
##
## Level 6: 176 nodes to be scored (4733 eliminated genes)
##
## Level 5: 216 nodes to be scored (6890 eliminated genes)
##
## Level 4: 188 nodes to be scored (10615 eliminated genes)
##
## Level 3: 59 nodes to be scored (13753 eliminated genes)
##
## Level 2: 17 nodes to be scored (14764 eliminated genes)
##
## Level 1: 1 nodes to be scored (17033 eliminated genes)
<- topGOResults_new(Genes=GeneVectors$DMGenesUp, gene2GO=MFann, ontology='MF',
ResMFUp desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='MFUp', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 4495 GO terms found. )
##
## Build GO DAG topology ..........
## ( 4939 GO terms and 6405 relations. )
##
## Annotating nodes ...............
## ( 17198 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 229 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 11: 2 nodes to be scored (0 eliminated genes)
##
## Level 10: 3 nodes to be scored (0 eliminated genes)
##
## Level 9: 9 nodes to be scored (105 eliminated genes)
##
## Level 8: 13 nodes to be scored (1251 eliminated genes)
##
## Level 7: 22 nodes to be scored (3296 eliminated genes)
##
## Level 6: 40 nodes to be scored (3671 eliminated genes)
##
## Level 5: 53 nodes to be scored (4661 eliminated genes)
##
## Level 4: 49 nodes to be scored (7802 eliminated genes)
##
## Level 3: 28 nodes to be scored (11292 eliminated genes)
##
## Level 2: 9 nodes to be scored (13782 eliminated genes)
##
## Level 1: 1 nodes to be scored (16972 eliminated genes)
if(nrow(ResMFAll$ResSel)>0){
topGOBarplot_new(TopGORes=ResMFAll$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("All DMGenes - ", Condition_2, " vs ", Condition_1),
palette=pals$All, flip_x=FALSE)
}
if(nrow(ResMFDown$ResSel)>0){
topGOBarplot_new(TopGORes=ResMFDown$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Down-methylated genes (associated to DMRs where methylation is lower in ", Condition_2, ")"),
palette=pals$Down, flip_x=FALSE)
}
if(nrow(ResMFUp$ResSel)>0){
topGOBarplot_new(TopGORes=ResMFUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Up-methylated genes (associated to DMRs where methylation is higher in ", Condition_2, ")"),
palette=pals$Up, flip_x=TRUE)
}
if(nrow(ResMFAll$ResSel)>0 && nrow(ResMFDown$ResSel)>0 && nrow(ResMFUp$ResSel)>0){
topGOBarplotAll_new(TopGOResAll=ResMFAll$ResSel, TopGOResDown=ResMFDown$ResSel, TopGOResUp=ResMFUp$ResSel,
terms=nterms, pvalTh=0.01, plotTitle= "TopGO analysis of DMGs - ")
}
if(nrow(ResMFAll$ResSel)>0){
bubbleplot(ResMFAll$ResSel, terms=nterms, Ont = "MF", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - All")
}
if(nrow(ResMFDown$ResSel)>0){
bubbleplot(ResMFDown$ResSel, terms=nterms, Ont = "MF", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Down", fillCol = "blue")
}
if(nrow(ResMFUp$ResSel)>0){
bubbleplot(ResMFUp$ResSel, terms=nterms, Ont = "MF", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Up", fillCol = "red")
}
if(nrow(ResMFAll$ResSel)>0){
plotGenesInTerm_v2(ResMFAll$ResSel, ResMFAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - All", Interactive=FALSE)
}
if(nrow(ResMFDown$ResSel)>0){
plotGenesInTerm_v2(ResMFDown$ResSel, ResMFAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Down", Interactive=FALSE, fillCol='blue')
}
if(nrow(ResMFUp$ResSel)>0){
plotGenesInTerm_v2(ResMFUp$ResSel, ResMFAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Up", Interactive=FALSE, fillCol='red')
}
On the basis of the analysis settings, the enrichment for Cellular Component IS performed.
# I generate a list that contains the association between each gene and the GO terms that are associated to it
<- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(GeneVectors$DMGenes),
CCann mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenes, gene2GO=CCann, ontology='CC',
ResCCAll desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='CCAll', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 1818 GO terms found. )
##
## Build GO DAG topology ..........
## ( 2014 GO terms and 3390 relations. )
##
## Annotating nodes ...............
## ( 17642 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 619 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 13: 1 nodes to be scored (0 eliminated genes)
##
## Level 12: 3 nodes to be scored (0 eliminated genes)
##
## Level 11: 28 nodes to be scored (30 eliminated genes)
##
## Level 10: 63 nodes to be scored (85 eliminated genes)
##
## Level 9: 95 nodes to be scored (1060 eliminated genes)
##
## Level 8: 96 nodes to be scored (3171 eliminated genes)
##
## Level 7: 89 nodes to be scored (6013 eliminated genes)
##
## Level 6: 86 nodes to be scored (9926 eliminated genes)
##
## Level 5: 71 nodes to be scored (12099 eliminated genes)
##
## Level 4: 46 nodes to be scored (14674 eliminated genes)
##
## Level 3: 38 nodes to be scored (16590 eliminated genes)
##
## Level 2: 2 nodes to be scored (17317 eliminated genes)
##
## Level 1: 1 nodes to be scored (17557 eliminated genes)
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenesDown, gene2GO=CCann, ontology='CC',
ResCCDown desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='CCDown', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 1818 GO terms found. )
##
## Build GO DAG topology ..........
## ( 2014 GO terms and 3390 relations. )
##
## Annotating nodes ...............
## ( 17642 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 617 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 13: 1 nodes to be scored (0 eliminated genes)
##
## Level 12: 3 nodes to be scored (0 eliminated genes)
##
## Level 11: 28 nodes to be scored (30 eliminated genes)
##
## Level 10: 63 nodes to be scored (85 eliminated genes)
##
## Level 9: 95 nodes to be scored (1060 eliminated genes)
##
## Level 8: 96 nodes to be scored (3171 eliminated genes)
##
## Level 7: 89 nodes to be scored (6013 eliminated genes)
##
## Level 6: 86 nodes to be scored (9926 eliminated genes)
##
## Level 5: 70 nodes to be scored (12099 eliminated genes)
##
## Level 4: 45 nodes to be scored (14674 eliminated genes)
##
## Level 3: 38 nodes to be scored (16590 eliminated genes)
##
## Level 2: 2 nodes to be scored (17317 eliminated genes)
##
## Level 1: 1 nodes to be scored (17557 eliminated genes)
# Wrapper function for topGO analysis
<- topGOResults_new(Genes=GeneVectors$DMGenesUp, gene2GO=CCann, ontology='CC',
ResCCUp desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher',
EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
saveRes=TRUE, outDir=OutputFolder, fileName='CCUp', maxAnn = 500)
## Gene vector contains levels: 0,1
##
## Building most specific GOs .....
## ( 1818 GO terms found. )
##
## Build GO DAG topology ..........
## ( 2014 GO terms and 3390 relations. )
##
## Annotating nodes ...............
## ( 17642 genes annotated to the GO terms. )
##
## -- Weight01 Algorithm --
##
## the algorithm is scoring 197 nontrivial nodes
## parameters:
## test statistic: fisher
##
## Level 11: 5 nodes to be scored (0 eliminated genes)
##
## Level 10: 11 nodes to be scored (0 eliminated genes)
##
## Level 9: 23 nodes to be scored (381 eliminated genes)
##
## Level 8: 25 nodes to be scored (1190 eliminated genes)
##
## Level 7: 31 nodes to be scored (3982 eliminated genes)
##
## Level 6: 28 nodes to be scored (7886 eliminated genes)
##
## Level 5: 29 nodes to be scored (10865 eliminated genes)
##
## Level 4: 21 nodes to be scored (14298 eliminated genes)
##
## Level 3: 21 nodes to be scored (16482 eliminated genes)
##
## Level 2: 2 nodes to be scored (17288 eliminated genes)
##
## Level 1: 1 nodes to be scored (17553 eliminated genes)
if(nrow(ResCCAll$ResSel)>0){
topGOBarplot_new(TopGORes=ResCCAll$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("All DMGenes - ", Condition_2, " vs ", Condition_1),
palette=pals$All, flip_x=FALSE)
}
if(nrow(ResCCDown$ResSel)>0){
topGOBarplot_new(TopGORes=ResCCDown$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Down-methylated genes (associated to DMRs where methylation is lower in ", Condition_2, ")"),
palette=pals$Down, flip_x=FALSE)
}
if(nrow(ResCCUp$ResSel)>0){
topGOBarplot_new(TopGORes=ResCCUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Up-methylated genes (associated to DMRs where methylation is higher in ", Condition_2, ")"), palette=pals$Up, flip_x=TRUE)
}
if(nrow(ResCCAll$ResSel)>0 && nrow(ResCCDown$ResSel)>0 && nrow(ResCCUp$ResSel)>0){
topGOBarplotAll_new(TopGOResAll=ResCCAll$ResSel, TopGOResDown=ResCCDown$ResSel, TopGOResUp=ResCCUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle="TopGO analysis of DMGs - ")
}
if(nrow(ResCCAll$ResSel)>0){
bubbleplot(ResCCAll$ResSel, terms=nterms, Ont = "CC", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - All")
}
if(nrow(ResCCDown$ResSel)>0){
bubbleplot(ResCCDown$ResSel, terms=nterms, Ont = "CC", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Down", fillCol = "blue")
}
if(nrow(ResCCUp$ResSel)>0){
bubbleplot(ResCCUp$ResSel, terms=nterms, Ont = "CC", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Up", fillCol = "red")
}
if(nrow(ResCCAll$ResSel)>0){
plotGenesInTerm_v2(ResCCAll$ResSel, ResCCAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - All", Interactive=FALSE)
}
if(nrow(ResCCDown$ResSel)>0){
plotGenesInTerm_v2(ResCCDown$ResSel, ResCCAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Down", Interactive=FALSE, fillCol='blue')
}
if(nrow(ResCCUp$ResSel)>0){
plotGenesInTerm_v2(ResCCUp$ResSel, ResCCAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Up", Interactive=FALSE, fillCol='red')
}
Most of the useful information has been saved during the analysis. Here I save the workspace and information about the session.
<- sessionInfo()
SessionInfo <- date() Date
Date## [1] "Fri Jan 10 12:44:58 2025"
SessionInfo## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods base
##
## other attached packages:
## [1] dplyr_1.1.2 tidyr_1.3.0 data.table_1.14.8 topGO_2.50.0 SparseM_1.81 GO.db_3.16.0 graph_1.76.0 viridis_0.6.2
## [9] viridisLite_0.4.2 RColorBrewer_1.1-3 gridExtra_2.3 org.Hs.eg.db_3.16.0 AnnotationDbi_1.60.2 IRanges_2.32.0 S4Vectors_0.36.2 Biobase_2.58.0
## [17] BiocGenerics_0.44.0 ggplot2_3.4.2 DT_0.28
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 R.utils_2.12.2 tidyselect_1.2.0 RSQLite_2.3.1
## [5] htmlwidgets_1.6.2 grid_4.2.1 BiocParallel_1.32.6 methylSig_1.10.0
## [9] scatterpie_0.1.8 munsell_0.5.0 codetools_0.2-19 withr_2.5.0
## [13] colorspace_2.1-0 GOSemSim_2.24.0 filelock_1.0.2 highr_0.10
## [17] knitr_1.43 rstudioapi_0.15.0 DOSE_3.24.2 MatrixGenerics_1.10.0
## [21] labeling_0.4.2 GenomeInfoDbData_1.2.9 polyclip_1.10-4 bit64_4.0.5
## [25] farver_2.1.1 rhdf5_2.42.1 vctrs_0.6.3 treeio_1.23.1
## [29] generics_0.1.3 xfun_0.39 BiocFileCache_2.6.1 R6_2.5.1
## [33] GenomeInfoDb_1.34.9 graphlayouts_0.8.4 locfit_1.5-9.7 bitops_1.0-7
## [37] rhdf5filters_1.10.1 cachem_1.0.8 fgsea_1.24.0 gridGraphics_0.5-1
## [41] DelayedArray_0.24.0 BiocIO_1.8.0 scales_1.2.1 bsseq_1.34.0
## [45] ggraph_2.1.0 enrichplot_1.18.4 gtable_0.3.3 tidygraph_1.2.3
## [49] rlang_1.1.1 splines_4.2.1 rtracklayer_1.58.0 lazyeval_0.2.2
## [53] DSS_2.46.0 BiocManager_1.30.25 yaml_2.3.7 reshape2_1.4.4
## [57] GenomicFeatures_1.50.4 qvalue_2.30.0 tools_4.2.1 ggplotify_0.1.0
## [61] gplots_3.1.3 jquerylib_0.1.4 Rcpp_1.0.11 plyr_1.8.8
## [65] sparseMatrixStats_1.10.0 progress_1.2.2 zlibbioc_1.44.0 purrr_1.0.1
## [69] RCurl_1.98-1.12 prettyunits_1.1.1 cowplot_1.1.1 SummarizedExperiment_1.28.0
## [73] ggrepel_0.9.3 magrittr_2.0.3 matrixStats_1.0.0 hms_1.1.3
## [77] patchwork_1.1.2 evaluate_0.21 HDO.db_0.99.1 XML_3.99-0.14
## [81] compiler_4.2.1 biomaRt_2.54.1 tibble_3.2.1 KernSmooth_2.23-22
## [85] crayon_1.5.2 shadowtext_0.1.2 R.oo_1.25.0 htmltools_0.5.5
## [89] ggfun_0.0.9 aplot_0.1.10 DBI_1.1.3 tweenr_2.0.2
## [93] ChIPseeker_1.34.1 dbplyr_2.3.3 MASS_7.3-60 rappdirs_0.3.3
## [97] boot_1.3-28.1 Matrix_1.6-0 permute_0.9-7 cli_3.6.1
## [101] R.methodsS3_1.8.2 parallel_4.2.1 igraph_1.5.0 GenomicRanges_1.50.2
## [105] pkgconfig_2.0.3 TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 GenomicAlignments_1.34.1 plotly_4.10.2
## [109] xml2_1.3.5 ggtree_3.6.2 bslib_0.5.0 XVector_0.38.0
## [113] yulab.utils_0.0.6 stringr_1.5.0 digest_0.6.33 Biostrings_2.66.0
## [117] rmarkdown_2.23 fastmatch_1.1-3 tidytree_0.4.2 DelayedMatrixStats_1.20.0
## [121] restfulr_0.0.15 curl_5.0.1 Rsamtools_2.14.0 gtools_3.9.4
## [125] rjson_0.2.21 lifecycle_1.0.3 nlme_3.1-162 jsonlite_1.8.7
## [129] Rhdf5lib_1.20.0 limma_3.54.2 BSgenome_1.66.3 fansi_1.0.4
## [133] pillar_1.9.0 lattice_0.21-8 plotrix_3.8-2 KEGGREST_1.38.0
## [137] fastmap_1.1.1 httr_1.4.6 glue_1.6.2 png_0.1-8
## [141] bit_4.0.5 ggforce_0.4.1 stringi_1.7.12 sass_0.4.7
## [145] HDF5Array_1.26.0 blob_1.2.4 caTools_1.18.2 memoise_2.0.1
## [149] renv_1.0.9 ape_5.7-1