knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, warning = FALSE)

Condition_1 <- params$Condition_1
Condition_2 <- params$Condition_2

Top GO Analysis for iMeLCs vs hPGCLCs

For this functional we will focus only on ALL differentially methylated genomic regions of protein coding genes!

1. Environment Set Up

library(DT)
library(ggplot2)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(gridExtra)
library(RColorBrewer)
library(viridis)
library(topGO)
library(data.table)
library(tidyr)
library(dplyr)

source("TopGO_helper_meth.R")
OutputFolder <- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder) 


if (dir.exists(OutputFolder) == FALSE) {
  dir.create(OutputFolder, recursive=TRUE)
}

2. Data Load

DMRAnnotated <- readRDS(params$DMRAnnotated)
GeneUniverse_df <- readRDS(params$GeneUniverse)
GeneUniverse_df_protein_coding <- GeneUniverse_df[GeneUniverse_df$gene_biotype %in% "protein_coding",]
GeneUniverse <- unique(GeneUniverse_df_protein_coding$hgnc_symbol)

We have 18940 protein-coding genes in the universe.

Saving of selected DMRs

DMReg <- DMRAnnotated[[paste0(Condition_1,"vs",Condition_2)]]
DMReg <- DMReg %>% filter(gene_biotype == "protein_coding")

3. TOPGO for Gene Ontology Enrichment analysis

Gene ontology enrichment analysis is performed on the set of DMGs (Differentially Methylates Genes) using TopGO with Fisher statistics and weight01 algorithm. DMGs are now splitted in down- and up-methylated.

3.1 Selection of modulated genes and generation of gene vectors

I generate vectors for the gene universe, all modulated genes, up-meth genes and down-meth genes in the format required by TopGO.

GeneVectors <- topGOGeneVectors_meth(DMRAnnotated, Cond1 = Condition_1, Cond2 = Condition_2, genomic_type = "all", gene_type = "protein_coding", Universe = GeneUniverse)
## [1] "All the genes associated to DMRs are in the gene universe!"

Therefore:

  • universe genes: 18940 genes
  • modulated genes: 780 genes
  • down-methylated genes: 734 genes of interest
  • up-methylated genes: 53 genes of interest

N.B.: The sum of down- and up- methylated genes is > the number of modulated genes because some genes are shared between up and down lists. This is because for example in a promoter of a gene meth can be higher for a cell type and in the gene body of the same gene meth can be higher for the other cell type!

BpEval <- ifelse(length(grep('BP', params$TopGO))!=0, TRUE, FALSE)
MfEval <- ifelse(length(grep('MF', params$TopGO))!=0, TRUE, FALSE)
CcEval <- ifelse(length(grep('CC', params$TopGO))!=0, TRUE, FALSE)

3.2 TopGO analysis: Biological Process

On the basis of the analysis settings, the enrichment for Biological Process IS performed.

Biological Process Analysis for ALL modulated genes: 780 genes

# I generate a list that contains the association between each gene and the GO terms that are associated to it
BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DMGenes), 
                           mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis 
ResBPAll <- topGOResults_new(Genes=GeneVectors$DMGenes, gene2GO=BPann, ontology='BP', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPAll', maxAnn = 500)
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 12448 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15834 GO terms and 35900 relations. )
## 
## Annotating nodes ...............
##  ( 16734 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4705 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  12 nodes to be scored   (35 eliminated genes)
## 
##   Level 15:  23 nodes to be scored   (97 eliminated genes)
## 
##   Level 14:  53 nodes to be scored   (285 eliminated genes)
## 
##   Level 13:  90 nodes to be scored   (613 eliminated genes)
## 
##   Level 12:  160 nodes to be scored  (1687 eliminated genes)
## 
##   Level 11:  331 nodes to be scored  (3966 eliminated genes)
## 
##   Level 10:  513 nodes to be scored  (6172 eliminated genes)
## 
##   Level 9:   672 nodes to be scored  (8054 eliminated genes)
## 
##   Level 8:   733 nodes to be scored  (10367 eliminated genes)
## 
##   Level 7:   748 nodes to be scored  (12590 eliminated genes)
## 
##   Level 6:   635 nodes to be scored  (14071 eliminated genes)
## 
##   Level 5:   395 nodes to be scored  (15454 eliminated genes)
## 
##   Level 4:   225 nodes to be scored  (16118 eliminated genes)
## 
##   Level 3:   89 nodes to be scored   (16385 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (16500 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (16557 eliminated genes)

Biological Process Analysis for DOWN-METHYLATED genes: 734 genes

# Wrapper function for topGO analysis 
ResBPDown <- topGOResults_new(Genes=GeneVectors$DMGenesDown, gene2GO=BPann, ontology='BP', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPDown', maxAnn = 500)
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 12448 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15834 GO terms and 35900 relations. )
## 
## Annotating nodes ...............
##  ( 16734 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4564 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  12 nodes to be scored   (35 eliminated genes)
## 
##   Level 15:  23 nodes to be scored   (97 eliminated genes)
## 
##   Level 14:  52 nodes to be scored   (285 eliminated genes)
## 
##   Level 13:  87 nodes to be scored   (613 eliminated genes)
## 
##   Level 12:  157 nodes to be scored  (1668 eliminated genes)
## 
##   Level 11:  318 nodes to be scored  (3953 eliminated genes)
## 
##   Level 10:  494 nodes to be scored  (6162 eliminated genes)
## 
##   Level 9:   641 nodes to be scored  (7978 eliminated genes)
## 
##   Level 8:   704 nodes to be scored  (10356 eliminated genes)
## 
##   Level 7:   729 nodes to be scored  (12551 eliminated genes)
## 
##   Level 6:   621 nodes to be scored  (14052 eliminated genes)
## 
##   Level 5:   390 nodes to be scored  (15448 eliminated genes)
## 
##   Level 4:   222 nodes to be scored  (16116 eliminated genes)
## 
##   Level 3:   88 nodes to be scored   (16385 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (16500 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (16557 eliminated genes)

# Selection on enrichment of at least 2 is implemented (also to avoid depleted categories). Then categories are ranked by PVal and all the ones with Pval < th are selected. If the number is < minTerms, othter terms are included to reach the minimum number. 

Biological Process Analysis for UP-METHYLATED genes: 53 genes

ResBPUp <- topGOResults_new(Genes=GeneVectors$DMGenesUp, gene2GO=BPann, ontology='BP', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='BPUp', maxAnn = 500)
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 12448 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15834 GO terms and 35900 relations. )
## 
## Annotating nodes ...............
##  ( 16734 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2251 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  11 nodes to be scored   (70 eliminated genes)
## 
##   Level 14:  18 nodes to be scored   (117 eliminated genes)
## 
##   Level 13:  35 nodes to be scored   (347 eliminated genes)
## 
##   Level 12:  46 nodes to be scored   (882 eliminated genes)
## 
##   Level 11:  103 nodes to be scored  (2992 eliminated genes)
## 
##   Level 10:  176 nodes to be scored  (4601 eliminated genes)
## 
##   Level 9:   288 nodes to be scored  (5895 eliminated genes)
## 
##   Level 8:   341 nodes to be scored  (7768 eliminated genes)
## 
##   Level 7:   379 nodes to be scored  (10072 eliminated genes)
## 
##   Level 6:   364 nodes to be scored  (12498 eliminated genes)
## 
##   Level 5:   259 nodes to be scored  (14557 eliminated genes)
## 
##   Level 4:   142 nodes to be scored  (15905 eliminated genes)
## 
##   Level 3:   67 nodes to be scored   (16274 eliminated genes)
## 
##   Level 2:   15 nodes to be scored   (16473 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (16544 eliminated genes)

Result visualization: Barplot

nterms=15
cols <- c(All='forestgreen', Down='blue', Up='red')
pals <- lapply(cols, {function(el) grDevices::colorRampPalette(c('gold', el))(nterms)})
if(nrow(ResBPAll$ResSel)>0){
  topGOBarplot_new(TopGORes=ResBPAll$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("All DMGenes - ", Condition_2, " vs ", Condition_1),
                             palette=pals$All, flip_x=FALSE)
}  

if(nrow(ResBPDown$ResSel)>0){
  topGOBarplot_new(TopGORes=ResBPDown$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Down-methylated genes (associated to DMRs where methylation is lower in ", Condition_2, ")"),
                             palette=pals$Down, flip_x=FALSE)
}

if(nrow(ResBPUp$ResSel)>0){
  topGOBarplot_new(TopGORes=ResBPUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Up-methylated genes (associated to DMRs where methylation is higher in ", Condition_2, ")"),
                             palette=pals$Up, flip_x=TRUE)
}

if(nrow(ResBPAll$ResSel)>0 && nrow(ResBPDown$ResSel)>0 && nrow(ResBPUp$ResSel)>0){
  topGOBarplotAll_new(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPDown$ResSel, TopGOResUp=ResBPUp$ResSel, 
                terms=nterms, pvalTh=0.01, plotTitle="TopGO analysis of DMGs - ")
}

Result visualization: Bubbleplot

if(nrow(ResBPAll$ResSel)>0){
  bubbleplot(ResBPAll$ResSel, terms=nterms, Ont = "BP", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - All")
  }

if(nrow(ResBPDown$ResSel)>0){
  bubbleplot(ResBPDown$ResSel, terms=nterms, Ont = "BP", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Down", fillCol = "blue")
  }

if(nrow(ResBPUp$ResSel)>0){
  bubbleplot(ResBPUp$ResSel, terms=nterms, Ont = "BP", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Up", fillCol = "red")
  }

Result visualization: Top genes in top GO terms

if(nrow(ResBPAll$ResSel)>0){
  plotGenesInTerm_v2(ResBPAll$ResSel, ResBPAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - All", Interactive=FALSE)
}

if(nrow(ResBPDown$ResSel)>0){
  plotGenesInTerm_v2(ResBPDown$ResSel, ResBPAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Down", Interactive=FALSE, fillCol='blue')
}  

if(nrow(ResBPUp$ResSel)>0){
  plotGenesInTerm_v2(ResBPUp$ResSel, ResBPAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Up", Interactive=FALSE, fillCol='red')
}

3.3 TopGO analysis: Molecular Function

On the basis of the analysis settings, the enrichment for Molecular Function IS performed.

Molecular Function Enrichment for ALL modulated genes: 780 genes

# I generate a list that contains the association between each gene and the GO terms that are associated to it
MFann <- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(GeneVectors$DMGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResMFAll <- topGOResults_new(Genes=GeneVectors$DMGenes, gene2GO=MFann, ontology='MF', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='MFAll', maxAnn = 500)
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4495 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4939 GO terms and 6405 relations. )
## 
## Annotating nodes ...............
##  ( 17198 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 769 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 12:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  15 nodes to be scored   (17 eliminated genes)
## 
##   Level 9:   30 nodes to be scored   (206 eliminated genes)
## 
##   Level 8:   53 nodes to be scored   (1569 eliminated genes)
## 
##   Level 7:   102 nodes to be scored  (3845 eliminated genes)
## 
##   Level 6:   139 nodes to be scored  (4496 eliminated genes)
## 
##   Level 5:   188 nodes to be scored  (6497 eliminated genes)
## 
##   Level 4:   164 nodes to be scored  (10331 eliminated genes)
## 
##   Level 3:   52 nodes to be scored   (13672 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (14698 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (17031 eliminated genes)

Molecular Function Enrichment for DOWN-METHYLATED genes: 734 genes

ResMFDown <- topGOResults_new(Genes=GeneVectors$DMGenesDown, gene2GO=MFann, ontology='MF', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='MFDown', maxAnn = 500)
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4495 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4939 GO terms and 6405 relations. )
## 
## Annotating nodes ...............
##  ( 17198 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 757 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 12:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  15 nodes to be scored   (17 eliminated genes)
## 
##   Level 9:   27 nodes to be scored   (206 eliminated genes)
## 
##   Level 8:   50 nodes to be scored   (1569 eliminated genes)
## 
##   Level 7:   101 nodes to be scored  (3810 eliminated genes)
## 
##   Level 6:   138 nodes to be scored  (4483 eliminated genes)
## 
##   Level 5:   185 nodes to be scored  (6483 eliminated genes)
## 
##   Level 4:   164 nodes to be scored  (10300 eliminated genes)
## 
##   Level 3:   52 nodes to be scored   (13658 eliminated genes)
## 
##   Level 2:   15 nodes to be scored   (14698 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (17031 eliminated genes)

Molecular Function Analysis for UP-METHYLATED genes: 53 genes

ResMFUp <- topGOResults_new(Genes=GeneVectors$DMGenesUp, gene2GO=MFann, ontology='MF', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='MFUp', maxAnn = 500)
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4495 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4939 GO terms and 6405 relations. )
## 
## Annotating nodes ...............
##  ( 17198 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 227 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   7 nodes to be scored    (47 eliminated genes)
## 
##   Level 8:   9 nodes to be scored    (1177 eliminated genes)
## 
##   Level 7:   19 nodes to be scored   (3196 eliminated genes)
## 
##   Level 6:   41 nodes to be scored   (3457 eliminated genes)
## 
##   Level 5:   58 nodes to be scored   (4575 eliminated genes)
## 
##   Level 4:   54 nodes to be scored   (7692 eliminated genes)
## 
##   Level 3:   27 nodes to be scored   (12087 eliminated genes)
## 
##   Level 2:   9 nodes to be scored    (13696 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (16965 eliminated genes)

Result visualization: Barplot

if(nrow(ResMFAll$ResSel)>0){
  topGOBarplot_new(TopGORes=ResMFAll$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("All DMGenes - ", Condition_2, " vs ", Condition_1),
                             palette=pals$All, flip_x=FALSE)
} 

if(nrow(ResMFDown$ResSel)>0){
  topGOBarplot_new(TopGORes=ResMFDown$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Down-methylated genes (associated to DMRs where methylation is lower in ", Condition_2, ")"),
                             palette=pals$Down, flip_x=FALSE)
}  

if(nrow(ResMFUp$ResSel)>0){
  topGOBarplot_new(TopGORes=ResMFUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Up-methylated genes (associated to DMRs where methylation is higher in ", Condition_2, ")"),
                             palette=pals$Up, flip_x=TRUE)
}

if(nrow(ResMFAll$ResSel)>0 && nrow(ResMFDown$ResSel)>0 && nrow(ResMFUp$ResSel)>0){
  topGOBarplotAll_new(TopGOResAll=ResMFAll$ResSel, TopGOResDown=ResMFDown$ResSel, TopGOResUp=ResMFUp$ResSel, 
                terms=nterms, pvalTh=0.01, plotTitle= "TopGO analysis of DMGs - ")
}  

Result visualization: Bubbleplot

if(nrow(ResMFAll$ResSel)>0){
  bubbleplot(ResMFAll$ResSel, terms=nterms, Ont = "MF", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - All")
  }

if(nrow(ResMFDown$ResSel)>0){
  bubbleplot(ResMFDown$ResSel, terms=nterms, Ont = "MF", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Down", fillCol = "blue")
  }

if(nrow(ResMFUp$ResSel)>0){
  bubbleplot(ResMFUp$ResSel, terms=nterms, Ont = "MF", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Up", fillCol = "red")
  }

Result visualization: Top genes in top GO terms

if(nrow(ResMFAll$ResSel)>0){
  plotGenesInTerm_v2(ResMFAll$ResSel, ResMFAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - All", Interactive=FALSE)
}

if(nrow(ResMFDown$ResSel)>0){
  plotGenesInTerm_v2(ResMFDown$ResSel, ResMFAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Down", Interactive=FALSE, fillCol='blue')
}  

if(nrow(ResMFUp$ResSel)>0){
  plotGenesInTerm_v2(ResMFUp$ResSel, ResMFAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Up", Interactive=FALSE, fillCol='red')
}

3.4 TopGO analysis: Cellular Component

On the basis of the analysis settings, the enrichment for Cellular Component IS performed.

Cellular Component Enrichment for ALL modulated genes: 780 genes

# I generate a list that contains the association between each gene and the GO terms that are associated to it
CCann <- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(GeneVectors$DMGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResCCAll <- topGOResults_new(Genes=GeneVectors$DMGenes, gene2GO=CCann, ontology='CC', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='CCAll', maxAnn = 500)
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1818 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 2014 GO terms and 3390 relations. )
## 
## Annotating nodes ...............
##  ( 17642 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 552 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  24 nodes to be scored   (30 eliminated genes)
## 
##   Level 10:  55 nodes to be scored   (60 eliminated genes)
## 
##   Level 9:   80 nodes to be scored   (988 eliminated genes)
## 
##   Level 8:   85 nodes to be scored   (3112 eliminated genes)
## 
##   Level 7:   85 nodes to be scored   (5810 eliminated genes)
## 
##   Level 6:   82 nodes to be scored   (9821 eliminated genes)
## 
##   Level 5:   61 nodes to be scored   (12089 eliminated genes)
## 
##   Level 4:   39 nodes to be scored   (14666 eliminated genes)
## 
##   Level 3:   35 nodes to be scored   (16582 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (17317 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (17557 eliminated genes)

Cellular Component Enrichment for DOWN-METHYLATED genes: 734 genes

# Wrapper function for topGO analysis 
ResCCDown <- topGOResults_new(Genes=GeneVectors$DMGenesDown, gene2GO=CCann, ontology='CC', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='CCDown', maxAnn = 500)
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1818 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 2014 GO terms and 3390 relations. )
## 
## Annotating nodes ...............
##  ( 17642 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 548 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  23 nodes to be scored   (30 eliminated genes)
## 
##   Level 10:  54 nodes to be scored   (42 eliminated genes)
## 
##   Level 9:   79 nodes to be scored   (986 eliminated genes)
## 
##   Level 8:   85 nodes to be scored   (3107 eliminated genes)
## 
##   Level 7:   85 nodes to be scored   (5803 eliminated genes)
## 
##   Level 6:   82 nodes to be scored   (9821 eliminated genes)
## 
##   Level 5:   61 nodes to be scored   (12089 eliminated genes)
## 
##   Level 4:   39 nodes to be scored   (14666 eliminated genes)
## 
##   Level 3:   35 nodes to be scored   (16582 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (17317 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (17557 eliminated genes)

Cellular Component Analysis for UP-METHYLATED genes: 53 genes

# Wrapper function for topGO analysis 
ResCCUp <- topGOResults_new(Genes=GeneVectors$DMGenesUp, gene2GO=CCann, ontology='CC', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='CCUp', maxAnn = 500)
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1818 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 2014 GO terms and 3390 relations. )
## 
## Annotating nodes ...............
##  ( 17642 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 223 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 12:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  6 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  12 nodes to be scored   (18 eliminated genes)
## 
##   Level 9:   19 nodes to be scored   (327 eliminated genes)
## 
##   Level 8:   30 nodes to be scored   (1321 eliminated genes)
## 
##   Level 7:   35 nodes to be scored   (3518 eliminated genes)
## 
##   Level 6:   35 nodes to be scored   (8782 eliminated genes)
## 
##   Level 5:   34 nodes to be scored   (11398 eliminated genes)
## 
##   Level 4:   24 nodes to be scored   (14355 eliminated genes)
## 
##   Level 3:   24 nodes to be scored   (16504 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (17288 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (17549 eliminated genes)

Result visualization: Barplot

if(nrow(ResCCAll$ResSel)>0){
  topGOBarplot_new(TopGORes=ResCCAll$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("All DMGenes - ", Condition_2, " vs ", Condition_1),
                             palette=pals$All, flip_x=FALSE)
}  

if(nrow(ResCCDown$ResSel)>0){
  topGOBarplot_new(TopGORes=ResCCDown$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Down-methylated genes (associated to DMRs where methylation is lower in ", Condition_2, ")"),
                             palette=pals$Down, flip_x=FALSE)
}

if(nrow(ResCCUp$ResSel)>0){
  topGOBarplot_new(TopGORes=ResCCUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle=paste0("Up-methylated genes (associated to DMRs where methylation is higher in ", Condition_2, ")"), palette=pals$Up, flip_x=TRUE)
}

if(nrow(ResCCAll$ResSel)>0 && nrow(ResCCDown$ResSel)>0 && nrow(ResCCUp$ResSel)>0){
  topGOBarplotAll_new(TopGOResAll=ResCCAll$ResSel, TopGOResDown=ResCCDown$ResSel, TopGOResUp=ResCCUp$ResSel, terms=nterms, pvalTh=0.01, plotTitle="TopGO analysis of DMGs - ")
}

Result visualization: Bubbleplot

if(nrow(ResCCAll$ResSel)>0){
  bubbleplot(ResCCAll$ResSel, terms=nterms, Ont = "CC", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - All")
  }

if(nrow(ResCCDown$ResSel)>0){
  bubbleplot(ResCCDown$ResSel, terms=nterms, Ont = "CC", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Down", fillCol = "blue")
  }

if(nrow(ResCCUp$ResSel)>0){
  bubbleplot(ResCCUp$ResSel, terms=nterms, Ont = "CC", pvalTh=0.01, plotTitle="TopGO analysis of DMGs - Up", fillCol = "red")
  }

Result visualization: Top genes in top GO terms

if(nrow(ResCCAll$ResSel)>0){
  plotGenesInTerm_v2(ResCCAll$ResSel, ResCCAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - All", Interactive=FALSE)
}

if(nrow(ResCCDown$ResSel)>0){
  plotGenesInTerm_v2(ResCCDown$ResSel, ResCCAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Down", Interactive=FALSE, fillCol='blue')
}  

if(nrow(ResCCUp$ResSel)>0){
  plotGenesInTerm_v2(ResCCUp$ResSel, ResCCAll$GOdata, DMRs = DMReg, nterms=nterms, ngenes=12, plotTitle="Top genes in top GO terms - Up", Interactive=FALSE, fillCol='red')
}


4. Savings

Most of the useful information has been saved during the analysis. Here I save the workspace and information about the session.

SessionInfo <- sessionInfo()
Date <- date()
Date
## [1] "Fri Jan 10 12:52:10 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices datasets  utils     methods   base     
## 
## other attached packages:
##  [1] dplyr_1.1.2          tidyr_1.3.0          data.table_1.14.8    topGO_2.50.0         SparseM_1.81         GO.db_3.16.0         graph_1.76.0         viridis_0.6.2       
##  [9] viridisLite_0.4.2    RColorBrewer_1.1-3   gridExtra_2.3        org.Hs.eg.db_3.16.0  AnnotationDbi_1.60.2 IRanges_2.32.0       S4Vectors_0.36.2     Biobase_2.58.0      
## [17] BiocGenerics_0.44.0  ggplot2_3.4.2        DT_0.28             
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3                              R.utils_2.12.2                          tidyselect_1.2.0                        RSQLite_2.3.1                          
##   [5] htmlwidgets_1.6.2                       grid_4.2.1                              BiocParallel_1.32.6                     methylSig_1.10.0                       
##   [9] scatterpie_0.1.8                        munsell_0.5.0                           codetools_0.2-19                        withr_2.5.0                            
##  [13] colorspace_2.1-0                        GOSemSim_2.24.0                         filelock_1.0.2                          highr_0.10                             
##  [17] knitr_1.43                              rstudioapi_0.15.0                       DOSE_3.24.2                             MatrixGenerics_1.10.0                  
##  [21] labeling_0.4.2                          GenomeInfoDbData_1.2.9                  polyclip_1.10-4                         bit64_4.0.5                            
##  [25] farver_2.1.1                            rhdf5_2.42.1                            vctrs_0.6.3                             treeio_1.23.1                          
##  [29] generics_0.1.3                          xfun_0.39                               BiocFileCache_2.6.1                     R6_2.5.1                               
##  [33] GenomeInfoDb_1.34.9                     graphlayouts_0.8.4                      locfit_1.5-9.7                          bitops_1.0-7                           
##  [37] rhdf5filters_1.10.1                     cachem_1.0.8                            fgsea_1.24.0                            gridGraphics_0.5-1                     
##  [41] DelayedArray_0.24.0                     BiocIO_1.8.0                            scales_1.2.1                            bsseq_1.34.0                           
##  [45] ggraph_2.1.0                            enrichplot_1.18.4                       gtable_0.3.3                            tidygraph_1.2.3                        
##  [49] rlang_1.1.1                             splines_4.2.1                           rtracklayer_1.58.0                      lazyeval_0.2.2                         
##  [53] DSS_2.46.0                              BiocManager_1.30.25                     yaml_2.3.7                              reshape2_1.4.4                         
##  [57] GenomicFeatures_1.50.4                  qvalue_2.30.0                           tools_4.2.1                             ggplotify_0.1.0                        
##  [61] gplots_3.1.3                            jquerylib_0.1.4                         Rcpp_1.0.11                             plyr_1.8.8                             
##  [65] sparseMatrixStats_1.10.0                progress_1.2.2                          zlibbioc_1.44.0                         purrr_1.0.1                            
##  [69] RCurl_1.98-1.12                         prettyunits_1.1.1                       cowplot_1.1.1                           SummarizedExperiment_1.28.0            
##  [73] ggrepel_0.9.3                           magrittr_2.0.3                          matrixStats_1.0.0                       hms_1.1.3                              
##  [77] patchwork_1.1.2                         evaluate_0.21                           HDO.db_0.99.1                           XML_3.99-0.14                          
##  [81] compiler_4.2.1                          biomaRt_2.54.1                          tibble_3.2.1                            KernSmooth_2.23-22                     
##  [85] crayon_1.5.2                            shadowtext_0.1.2                        R.oo_1.25.0                             htmltools_0.5.5                        
##  [89] ggfun_0.0.9                             aplot_0.1.10                            DBI_1.1.3                               tweenr_2.0.2                           
##  [93] ChIPseeker_1.34.1                       dbplyr_2.3.3                            MASS_7.3-60                             rappdirs_0.3.3                         
##  [97] boot_1.3-28.1                           Matrix_1.6-0                            permute_0.9-7                           cli_3.6.1                              
## [101] R.methodsS3_1.8.2                       parallel_4.2.1                          igraph_1.5.0                            GenomicRanges_1.50.2                   
## [105] pkgconfig_2.0.3                         TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 GenomicAlignments_1.34.1                plotly_4.10.2                          
## [109] xml2_1.3.5                              ggtree_3.6.2                            bslib_0.5.0                             XVector_0.38.0                         
## [113] yulab.utils_0.0.6                       stringr_1.5.0                           digest_0.6.33                           Biostrings_2.66.0                      
## [117] rmarkdown_2.23                          fastmatch_1.1-3                         tidytree_0.4.2                          DelayedMatrixStats_1.20.0              
## [121] restfulr_0.0.15                         curl_5.0.1                              Rsamtools_2.14.0                        gtools_3.9.4                           
## [125] rjson_0.2.21                            lifecycle_1.0.3                         nlme_3.1-162                            jsonlite_1.8.7                         
## [129] Rhdf5lib_1.20.0                         limma_3.54.2                            BSgenome_1.66.3                         fansi_1.0.4                            
## [133] pillar_1.9.0                            lattice_0.21-8                          plotrix_3.8-2                           KEGGREST_1.38.0                        
## [137] fastmap_1.1.1                           httr_1.4.6                              glue_1.6.2                              png_0.1-8                              
## [141] bit_4.0.5                               ggforce_0.4.1                           stringi_1.7.12                          sass_0.4.7                             
## [145] HDF5Array_1.26.0                        blob_1.2.4                              caTools_1.18.2                          memoise_2.0.1                          
## [149] renv_1.0.9                              ape_5.7-1