TopGO - CTL08

Condition_1 <- params$Condition_1
Condition_2 <- params$Condition_2

Top GO Analysis for Ret AgvsInh

1. Environment Set Up

library(RNASeqBulkExploratory)
library(DT)
library(ggplot2)
library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
library(org.Hs.eg.db)
## 
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library(RColorBrewer)
library(viridis)
## Loading required package: viridisLite
library(topGO)
## Loading required package: graph
## Loading required package: GO.db
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following object is masked from 'package:graph':
## 
##     union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
library(sechm)

source("../../plotGenesInTerm_v2.R")

Dataset <- params$Dataset
logFcTh <- params$logFcTh
FdrTh <- params$FdrTh
OutputFolder <- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder) 


if (dir.exists(OutputFolder) == FALSE) {
  dir.create(OutputFolder, recursive=TRUE)
}

2. Data Upload

Summarized Experiment object containing expression data used for DEA and gene and sample metadata
DEA object, containing results of the differential expression

2.1 Load Data from DEA

# List with differential expression results 
DEA <- readRDS(params$DEAFile)

#SE object coming from DEA, but not containing specific contrast results
SE_DEA <- readRDS(params$SEFile)

2.2 Add DEA results to SE

if(! identical(rownames(SE_DEA), row.names(DEA[[Condition_1]][[Condition_2]]$Res))){
  stop('Expression data in SE and results from differential espression analysis are inconsistent.')
}
## Loading required package: DESeq2


rowData(SE_DEA) <- cbind(rowData(SE_DEA)[,1:6], DEA[[Condition_1]][[Condition_2]]$Res)
  
# Column names must be set to be compliant with the required format to be recognized by ORA
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='log2FoldChange')] <- 'logFC'
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='padj')] <- 'FDR'

#metadata(SE_DEA_Prel)$annotation <- 'hsa'

14850 genes in 20 samples have been testes for differential expression.

Imposing a threshold of 1 on the Log2FC and 0.01 on the FDR (as specified in parameters), 2425 genes are selected: 1463 up-regulated genes and 962 down-regulated genes.

3. RESULTS NAVIGATION: Interactive Table

An interactive table show the results for the top 500 DEGs (ranked according to FDR).

DEGsTable(SE_DEA, FdrTh=0.01, logFcTh=1, maxGenes=500, saveDEGs=TRUE, outDir=OutputFolder)

## Top 500 genes will be included in the table

4. RESULTS VISUALIZATION

4.1 Volcano plot

plotVolcanoSE(SE=SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh, FdrCeil=1e-10, logFcCeil=4)

4.2 Heatmap for significant genes

Heatmaps for DEGs, showing scaled vst values.

DEGs <- dplyr::filter(data.frame(rowData(SE_DEA)), FDR < FdrTh & abs(logFC) > logFcTh)   


ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')

colData(SE_DEA)$Condition <- factor(colData(SE_DEA)$Condition, levels=c("CTL", "DMSO", "AhHyd_Ag", "AhHyd_Inh", "Andr_Ag", "Andr_Inh", "Estr_Ag", "Estr_Inh", "GC_Ag", "GC_Inh", "LivX_Ag", "LivX_Inh", "Ret_Ag", "Ret_Inh", "Thyr_Ag", "Thyr_Inh" ))

metadata(SE_DEA)$anno_colors <- list(Condition = c('DMSO' = 'grey30', 'CTL' = 'azure3', 
                 'AhHyd_Ag'='#F8766D', 'AhHyd_Inh'='#F8766D50',
                 'Andr_Ag'='#fccb17', 'Andr_Inh'='#C49A0050',  
                 "Estr_Ag"= '#53B400', "Estr_Inh"= '#53B40050', 
                 'GC_Ag' = '#00C094', 'GC_Inh' = '#00C09450',
                 'LivX_Ag' = '#00B6EB', 'LivX_Inh' = '#00B6EB50', 
                 'Ret_Ag' = '#A58AFF', 'Ret_Inh' = '#A58AFF50', 
                 'Thyr_Ag' = '#FB61D7', 'Thyr_Inh' = '#FB61D750'
                 ), SeqRun = c("20210310" = "orange", "20210724"  = "green4", "20220422" = 
                                          "#66ACB7"))

sechm(SE_DEA, features=DEGs$GeneName, assayName="vst", gaps_at="Condition", show_rownames=FALSE,
      top_annotation=c('Condition', 'SeqRun'), hmcols=ScaledCols, show_colnames=TRUE,
      do.scale=TRUE, breaks=0.85, column_title = "Scaled Vst Values")

## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

5. TOPGO for Gene Ontology Enrichment analysis

Gene ontology enrichment analysis is performed on the set of 2425 genes using TopGO with Fisher statistics and weight01 algorithm.

For each specified domain of the ontology:

Enrichment analysis on all DEGs or splitted in down- and up-regulated

5.1 Selection of modulated genes and generation of gene vectors

GeneVectors <- topGOGeneVectors(SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh)

## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1

Therefore:

universe genes: 14850 genes
modulated genes: 2425 genes
down-regulated genes: 962 genes of interest
up-regulated genes: 1463 genes of interest

BpEval <- ifelse(length(grep('BP', params$TopGO))!=0, TRUE, FALSE)
MfEval <- ifelse(length(grep('MF', params$TopGO))!=0, TRUE, FALSE)
CcEval <- ifelse(length(grep('CC', params$TopGO))!=0, TRUE, FALSE)

5.2 TopGO analysis: Biological Process

On the basis of the analysis settings, the enrichment for Biological Process IS performed.

Biological Process Analysis for ALL modulated genes: 2425 genes

BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis 
ResBPAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11591 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15045 GO terms and 33988 relations. )
## 
## Annotating nodes ...............
##  ( 13128 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4913 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  13 nodes to be scored   (18 eliminated genes)
## 
##   Level 15:  26 nodes to be scored   (64 eliminated genes)
## 
##   Level 14:  57 nodes to be scored   (254 eliminated genes)
## 
##   Level 13:  100 nodes to be scored  (559 eliminated genes)
## 
##   Level 12:  160 nodes to be scored  (1535 eliminated genes)
## 
##   Level 11:  347 nodes to be scored  (3453 eliminated genes)
## 
##   Level 10:  544 nodes to be scored  (5291 eliminated genes)
## 
##   Level 9:   697 nodes to be scored  (6972 eliminated genes)
## 
##   Level 8:   749 nodes to be scored  (8891 eliminated genes)
## 
##   Level 7:   795 nodes to be scored  (10543 eliminated genes)
## 
##   Level 6:   668 nodes to be scored  (11607 eliminated genes)
## 
##   Level 5:   412 nodes to be scored  (12294 eliminated genes)
## 
##   Level 4:   229 nodes to be scored  (12707 eliminated genes)
## 
##   Level 3:   93 nodes to be scored   (12882 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (12959 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13003 eliminated genes)

Biological Process Analysis for DOWN-REGULATED genes: 962 genes

# Wrapper function for topGO analysis 
ResBPDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=BPann, ontology='BP', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11591 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15045 GO terms and 33988 relations. )
## 
## Annotating nodes ...............
##  ( 13128 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4495 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  10 nodes to be scored   (18 eliminated genes)
## 
##   Level 15:  23 nodes to be scored   (64 eliminated genes)
## 
##   Level 14:  50 nodes to be scored   (227 eliminated genes)
## 
##   Level 13:  84 nodes to be scored   (537 eliminated genes)
## 
##   Level 12:  143 nodes to be scored  (1440 eliminated genes)
## 
##   Level 11:  313 nodes to be scored  (3329 eliminated genes)
## 
##   Level 10:  493 nodes to be scored  (5152 eliminated genes)
## 
##   Level 9:   625 nodes to be scored  (6802 eliminated genes)
## 
##   Level 8:   684 nodes to be scored  (8726 eliminated genes)
## 
##   Level 7:   725 nodes to be scored  (10398 eliminated genes)
## 
##   Level 6:   623 nodes to be scored  (11524 eliminated genes)
## 
##   Level 5:   391 nodes to be scored  (12239 eliminated genes)
## 
##   Level 4:   215 nodes to be scored  (12698 eliminated genes)
## 
##   Level 3:   93 nodes to be scored   (12878 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (12959 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13003 eliminated genes)

GOTable(ResBPDown$ResSel, maxGO=20)

Biological Process Analysis for UP-REGULATED genes: 1463 genes

ResBPUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=BPann, ontology='BP', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='BPUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11591 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 15045 GO terms and 33988 relations. )
## 
## Annotating nodes ...............
##  ( 13128 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 4569 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  12 nodes to be scored   (18 eliminated genes)
## 
##   Level 15:  25 nodes to be scored   (50 eliminated genes)
## 
##   Level 14:  52 nodes to be scored   (233 eliminated genes)
## 
##   Level 13:  82 nodes to be scored   (538 eliminated genes)
## 
##   Level 12:  135 nodes to be scored  (1419 eliminated genes)
## 
##   Level 11:  300 nodes to be scored  (3315 eliminated genes)
## 
##   Level 10:  487 nodes to be scored  (5106 eliminated genes)
## 
##   Level 9:   643 nodes to be scored  (6688 eliminated genes)
## 
##   Level 8:   702 nodes to be scored  (8706 eliminated genes)
## 
##   Level 7:   760 nodes to be scored  (10371 eliminated genes)
## 
##   Level 6:   640 nodes to be scored  (11548 eliminated genes)
## 
##   Level 5:   399 nodes to be scored  (12279 eliminated genes)
## 
##   Level 4:   220 nodes to be scored  (12704 eliminated genes)
## 
##   Level 3:   90 nodes to be scored   (12882 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (12959 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13003 eliminated genes)

GOTable(ResBPUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPDown$ResSel, TopGOResUp=ResBPUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResBPAll$ResSel, ResBPAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResBPDown$ResSel, ResBPDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResBPUp$ResSel, ResBPUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.3 TopGO analysis: Molecular Function

On the basis of the analysis settings, the enrichment for Molecular Function IS performed.

Molecular Function Enrichment for ALL modulated genes: 2425 genes

MFann <- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResMFAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=MFann, ontology='MF', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='MFAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4114 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4579 GO terms and 5967 relations. )
## 
## Annotating nodes ...............
##  ( 13476 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 834 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  13 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   30 nodes to be scored   (177 eliminated genes)
## 
##   Level 8:   62 nodes to be scored   (1256 eliminated genes)
## 
##   Level 7:   111 nodes to be scored  (3297 eliminated genes)
## 
##   Level 6:   164 nodes to be scored  (3980 eliminated genes)
## 
##   Level 5:   193 nodes to be scored  (5571 eliminated genes)
## 
##   Level 4:   176 nodes to be scored  (8602 eliminated genes)
## 
##   Level 3:   59 nodes to be scored   (10847 eliminated genes)
## 
##   Level 2:   17 nodes to be scored   (11639 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13358 eliminated genes)

Molecular Function Enrichment for DOWN-REGULATED genes: 962 genes

ResMFDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=MFann, ontology='MF', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='MFDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4114 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4579 GO terms and 5967 relations. )
## 
## Annotating nodes ...............
##  ( 13476 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 710 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  4 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   20 nodes to be scored   (101 eliminated genes)
## 
##   Level 8:   46 nodes to be scored   (1096 eliminated genes)
## 
##   Level 7:   95 nodes to be scored   (3146 eliminated genes)
## 
##   Level 6:   141 nodes to be scored  (3888 eliminated genes)
## 
##   Level 5:   164 nodes to be scored  (5431 eliminated genes)
## 
##   Level 4:   158 nodes to be scored  (8431 eliminated genes)
## 
##   Level 3:   56 nodes to be scored   (10723 eliminated genes)
## 
##   Level 2:   17 nodes to be scored   (11573 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13358 eliminated genes)

GOTable(ResMFDown$ResSel, maxGO=20)

Molecular Function Analysis for UP-REGULATED genes: 1463 genes

ResMFUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=MFann, ontology='MF', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='MFUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4114 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4579 GO terms and 5967 relations. )
## 
## Annotating nodes ...............
##  ( 13476 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 743 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  7 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  10 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   26 nodes to be scored   (162 eliminated genes)
## 
##   Level 8:   57 nodes to be scored   (1205 eliminated genes)
## 
##   Level 7:   87 nodes to be scored   (3183 eliminated genes)
## 
##   Level 6:   144 nodes to be scored  (3859 eliminated genes)
## 
##   Level 5:   179 nodes to be scored  (5238 eliminated genes)
## 
##   Level 4:   165 nodes to be scored  (8502 eliminated genes)
## 
##   Level 3:   51 nodes to be scored   (10816 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (11621 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13341 eliminated genes)

GOTable(ResMFUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResMFAll$ResSel, TopGOResDown=ResMFDown$ResSel, TopGOResUp=ResMFUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResMFAll$ResSel, ResMFAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResMFDown$ResSel, ResMFDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResMFUp$ResSel, ResMFUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.4 TopGO analysis: Cellular Component

On the basis of the analysis settings, the enrichment for Cellular Component IS performed.

Cellular Component Enrichment for ALL modulated genes: 2425 genes

CCann <- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResCCAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=CCann, ontology='CC', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='CCAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1740 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1936 GO terms and 3272 relations. )
## 
## Annotating nodes ...............
##  ( 13716 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 619 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  26 nodes to be scored   (30 eliminated genes)
## 
##   Level 10:  67 nodes to be scored   (83 eliminated genes)
## 
##   Level 9:   96 nodes to be scored   (889 eliminated genes)
## 
##   Level 8:   91 nodes to be scored   (2810 eliminated genes)
## 
##   Level 7:   85 nodes to be scored   (5188 eliminated genes)
## 
##   Level 6:   89 nodes to be scored   (8520 eliminated genes)
## 
##   Level 5:   73 nodes to be scored   (10127 eliminated genes)
## 
##   Level 4:   47 nodes to be scored   (12063 eliminated genes)
## 
##   Level 3:   38 nodes to be scored   (13176 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13557 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13659 eliminated genes)

Cellular Component Enrichment for DOWN-REGULATED genes: 962 genes

# Wrapper function for topGO analysis 
ResCCDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=CCann, ontology='CC', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='CCDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1740 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1936 GO terms and 3272 relations. )
## 
## Annotating nodes ...............
##  ( 13716 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 558 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 12:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  20 nodes to be scored   (0 eliminated genes)
## 
##   Level 10:  56 nodes to be scored   (24 eliminated genes)
## 
##   Level 9:   79 nodes to be scored   (784 eliminated genes)
## 
##   Level 8:   87 nodes to be scored   (2547 eliminated genes)
## 
##   Level 7:   81 nodes to be scored   (4981 eliminated genes)
## 
##   Level 6:   78 nodes to be scored   (8511 eliminated genes)
## 
##   Level 5:   69 nodes to be scored   (10110 eliminated genes)
## 
##   Level 4:   46 nodes to be scored   (12046 eliminated genes)
## 
##   Level 3:   38 nodes to be scored   (13174 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13557 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13659 eliminated genes)

GOTable(ResCCDown$ResSel, maxGO=20)

Cellular Component Analysis for UP-REGULATED genes: 1463 genes

# Wrapper function for topGO analysis 
ResCCUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=CCann, ontology='CC', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='CCUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1740 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1936 GO terms and 3272 relations. )
## 
## Annotating nodes ...............
##  ( 13716 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 531 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  18 nodes to be scored   (30 eliminated genes)
## 
##   Level 10:  46 nodes to be scored   (59 eliminated genes)
## 
##   Level 9:   76 nodes to be scored   (638 eliminated genes)
## 
##   Level 8:   78 nodes to be scored   (2540 eliminated genes)
## 
##   Level 7:   80 nodes to be scored   (4914 eliminated genes)
## 
##   Level 6:   83 nodes to be scored   (8421 eliminated genes)
## 
##   Level 5:   66 nodes to be scored   (10105 eliminated genes)
## 
##   Level 4:   41 nodes to be scored   (12038 eliminated genes)
## 
##   Level 3:   37 nodes to be scored   (13171 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13554 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13659 eliminated genes)

GOTable(ResCCUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResCCAll$ResSel, TopGOResDown=ResCCDown$ResSel, TopGOResUp=ResCCUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResCCAll$ResSel, ResCCAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResCCDown$ResSel, ResCCDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResCCUp$ResSel, ResCCUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

6. Savings

SessionInfo <- sessionInfo()
Date <- date()
#
save.image(paste0(OutputFolder, Dataset, 'FunctionalAnalysisWorkspace.RData'))

SessionInfo

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] DESeq2_1.38.3               sechm_1.6.0                
##  [3] SummarizedExperiment_1.28.0 GenomicRanges_1.50.2       
##  [5] GenomeInfoDb_1.34.9         MatrixGenerics_1.10.0      
##  [7] matrixStats_0.63.0          dplyr_1.1.0                
##  [9] tidyr_1.3.0                 data.table_1.14.8          
## [11] topGO_2.50.0                SparseM_1.81               
## [13] GO.db_3.16.0                graph_1.76.0               
## [15] viridis_0.6.2               viridisLite_0.4.1          
## [17] RColorBrewer_1.1-3          gridExtra_2.3              
## [19] org.Hs.eg.db_3.16.0         AnnotationDbi_1.60.0       
## [21] IRanges_2.32.0              S4Vectors_0.36.1           
## [23] Biobase_2.58.0              BiocGenerics_0.44.0        
## [25] ggplot2_3.4.1               DT_0.27                    
## [27] RNASeqBulkExploratory_0.2.1
## 
## loaded via a namespace (and not attached):
##  [1] Rtsne_0.16             colorspace_2.1-0       rjson_0.2.21          
##  [4] ellipsis_0.3.2         circlize_0.4.15        XVector_0.38.0        
##  [7] GlobalOptions_0.1.2    clue_0.3-64            rstudioapi_0.14       
## [10] farver_2.1.1           bit64_4.0.5            fansi_1.0.4           
## [13] codetools_0.2-19       doParallel_1.0.17      cachem_1.0.7          
## [16] geneplotter_1.76.0     knitr_1.42             jsonlite_1.8.4        
## [19] annotate_1.76.0        cluster_2.1.4          png_0.1-8             
## [22] compiler_4.2.1         httr_1.4.5             lazyeval_0.2.2        
## [25] Matrix_1.5-3           fastmap_1.1.1          cli_3.6.1             
## [28] htmltools_0.5.4        tools_4.2.1            gtable_0.3.1          
## [31] glue_1.6.2             GenomeInfoDbData_1.2.9 V8_4.2.2              
## [34] Rcpp_1.0.10            jquerylib_0.1.4        vctrs_0.6.2           
## [37] Biostrings_2.66.0      iterators_1.0.14       crosstalk_1.2.0       
## [40] xfun_0.37              stringr_1.5.0          lifecycle_1.0.3       
## [43] XML_3.99-0.13          ca_0.71.1              zlibbioc_1.44.0       
## [46] scales_1.2.1           TSP_1.2-2              parallel_4.2.1        
## [49] ComplexHeatmap_2.14.0  yaml_2.3.7             curl_5.0.0            
## [52] memoise_2.0.1          sass_0.4.5             stringi_1.7.12        
## [55] RSQLite_2.3.0          highr_0.10             randomcoloR_1.1.0.1   
## [58] foreach_1.5.2          seriation_1.4.1        BiocParallel_1.32.5   
## [61] shape_1.4.6            rlang_1.1.1            pkgconfig_2.0.3       
## [64] bitops_1.0-7           evaluate_0.20          lattice_0.20-45       
## [67] purrr_1.0.1            labeling_0.4.2         htmlwidgets_1.6.1     
## [70] bit_4.0.5              tidyselect_1.2.0       magrittr_2.0.3        
## [73] R6_2.5.1               generics_0.1.3         DelayedArray_0.24.0   
## [76] DBI_1.1.3              pillar_1.8.1           withr_2.5.0           
## [79] KEGGREST_1.38.0        RCurl_1.98-1.10        tibble_3.2.1          
## [82] crayon_1.5.2           utf8_1.2.3             plotly_4.10.1         
## [85] rmarkdown_2.20         GetoptLong_1.0.5       locfit_1.5-9.7        
## [88] grid_4.2.1             blob_1.2.3             digest_0.6.31         
## [91] xtable_1.8-4           munsell_0.5.0          registry_0.5-1        
## [94] bslib_0.4.2

Date

## [1] "Fri Jul 18 18:17:06 2025"

TopGO - CTL08

Date: July 18, 2025

1. Environment Set Up

2. Data Upload

2.1 Load Data from DEA

2.2 Add DEA results to SE

3. RESULTS NAVIGATION: Interactive Table

4. RESULTS VISUALIZATION

4.1 Volcano plot

4.2 Heatmap for significant genes

5. TOPGO for Gene Ontology Enrichment analysis

5.1 Selection of modulated genes and generation of gene vectors

5.2 TopGO analysis: Biological Process

Biological Process Analysis for ALL modulated genes: 2425 genes

Biological Process Analysis for DOWN-REGULATED genes: 962 genes

Biological Process Analysis for UP-REGULATED genes: 1463 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.3 TopGO analysis: Molecular Function

Molecular Function Enrichment for ALL modulated genes: 2425 genes

Molecular Function Enrichment for DOWN-REGULATED genes: 962 genes

Molecular Function Analysis for UP-REGULATED genes: 1463 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.4 TopGO analysis: Cellular Component

Cellular Component Enrichment for ALL modulated genes: 2425 genes

Cellular Component Enrichment for DOWN-REGULATED genes: 962 genes

Cellular Component Analysis for UP-REGULATED genes: 1463 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

6. Savings