TopGO - CTL04

Condition_1 <- params$Condition_1
Condition_2 <- params$Condition_2

Top GO Analysis for Estr Inhibitor

1. Environment Set Up

library(RNASeqBulkExploratory)
library(DT)
library(ggplot2)
library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
library(org.Hs.eg.db)
## 
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library(RColorBrewer)
library(viridis)
## Loading required package: viridisLite
library(topGO)
## Loading required package: graph
## Loading required package: GO.db
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following object is masked from 'package:graph':
## 
##     union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
library(sechm)

source("../../plotGenesInTerm_v2.R")

Dataset <- params$Dataset
logFcTh <- params$logFcTh
FdrTh <- params$FdrTh
OutputFolder <- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder) 


if (dir.exists(OutputFolder) == FALSE) {
  dir.create(OutputFolder, recursive=TRUE)
}

2. Data Upload

Summarized Experiment object containing expression data used for DEA and gene and sample metadata
DEA object, containing results of the differential expression

2.1 Load Data from DEA

# List with differential expression results 
DEA <- readRDS(params$DEAFile)

#SE object coming from DEA, but not containing specific contrast results
SE_DEA <- readRDS(params$SEFile)

2.2 Add DEA results to SE

if(! identical(rownames(SE_DEA), row.names(DEA[[Condition_1]][[Condition_2]]$Res))){
  stop('Expression data in SE and results from differential espression analysis are inconsistent.')
}
## Loading required package: DESeq2


rowData(SE_DEA) <- cbind(rowData(SE_DEA)[,1:6], DEA[[Condition_1]][[Condition_2]]$Res)
  
# Column names must be set to be compliant with the required format to be recognized by ORA
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='log2FoldChange')] <- 'logFC'
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='padj')] <- 'FDR'

#metadata(SE_DEA_Prel)$annotation <- 'hsa'

14305 genes in 21 samples have been testes for differential expression.

Imposing a threshold of 1 on the Log2FC and 0.01 on the FDR (as specified in parameters), 82 genes are selected: 26 up-regulated genes and 56 down-regulated genes.

3. RESULTS NAVIGATION: Interactive Table

An interactive table show the results for the top 500 DEGs (ranked according to FDR).

DEGsTable(SE_DEA, FdrTh=0.01, logFcTh=1, maxGenes=500, saveDEGs=TRUE, outDir=OutputFolder)

4. RESULTS VISUALIZATION

4.1 Volcano plot

The results of the differential expression analysis are visualized by Volcano plot. An interactive version is included in the html (only genes with FDR < threshold), while a static version is saved.

plotVolcanoSE(SE=SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh, FdrCeil=1e-10, logFcCeil=4)

4.3 Heatmap for significant genes

Heatmaps for DEGs, showing scaled vst values.

DEGs <- dplyr::filter(data.frame(rowData(SE_DEA)), FDR < FdrTh & abs(logFC) > logFcTh)   


ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')

colData(SE_DEA)$Condition <- factor(colData(SE_DEA)$Condition, levels=c("CTL", "DMSO", "AhHyd_Ag", "AhHyd_Inh", "Andr_Ag", "Andr_Inh", "Estr_Ag", "Estr_Inh", "GC_Ag", "GC_Inh", "LivX_Ag", "LivX_Inh", "Ret_Ag", "Ret_Inh", "Thyr_Ag", "Thyr_Inh" ))

metadata(SE_DEA)$anno_colors <- list(Condition = c('DMSO' = 'grey30', 'CTL' = 'azure3', 
                 'AhHyd_Ag'='#F8766D', 'AhHyd_Inh'='#F8766D50',
                 'Andr_Ag'='#fccb17', 'Andr_Inh'='#C49A0050',  
                 "Estr_Ag"= '#53B400', "Estr_Inh"= '#53B40050', 
                 'GC_Ag' = '#00C094', 'GC_Inh' = '#00C09450',
                 'LivX_Ag' = '#00B6EB', 'LivX_Inh' = '#00B6EB50', 
                 'Ret_Ag' = '#A58AFF', 'Ret_Inh' = '#A58AFF50', 
                 'Thyr_Ag' = '#FB61D7', 'Thyr_Inh' = '#FB61D750'
                 ))

sechm(SE_DEA, features=DEGs$GeneName, assayName="vst", gaps_at="Condition", show_rownames=FALSE,
      top_annotation=c('Condition'), hmcols=ScaledCols, show_colnames=TRUE,
      do.scale=TRUE, breaks=0.85)

5. TOPGO for Gene Ontology Enrichment analysis

Gene ontology enrichment analysis is performed on the set of 82 genes using TopGO with Fisher statistics and weight01 algorithm.

For each specified domain of the ontology:

Enrichment analysis on all DEGs or splitted in down- and up-regulated

5.1 Selection of modulated genes and generation of gene vectors

I generate vectors for the gene universe, all modulated genes, up-regulated genes and down-regulated genes in the format required by TopGo.

GeneVectors <- topGOGeneVectors(SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh)

## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1

Therefore:

universe genes: 14305 genes
modulated genes: 82 genes
down-regulated genes: 56 genes of interest
up-regulated genes: 26 genes of interest

Then I set parameters according to the gene ontology domains to be evaluated. By default, Biological Process and Molecular Function domains are interrogated.

BpEval <- ifelse(length(grep('BP', params$TopGO))!=0, TRUE, FALSE)
MfEval <- ifelse(length(grep('MF', params$TopGO))!=0, TRUE, FALSE)
CcEval <- ifelse(length(grep('CC', params$TopGO))!=0, TRUE, FALSE)

5.2 TopGO analysis: Biological Process

On the basis of the analysis settings, the enrichment for Biological Process IS performed.

Biological Process Analysis for ALL modulated genes: 82 genes

BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis 
ResBPAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2004 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  1 nodes to be scored    (18 eliminated genes)
## 
##   Level 15:  5 nodes to be scored    (23 eliminated genes)
## 
##   Level 14:  6 nodes to be scored    (25 eliminated genes)
## 
##   Level 13:  14 nodes to be scored   (191 eliminated genes)
## 
##   Level 12:  34 nodes to be scored   (530 eliminated genes)
## 
##   Level 11:  80 nodes to be scored   (2197 eliminated genes)
## 
##   Level 10:  156 nodes to be scored  (3751 eliminated genes)
## 
##   Level 9:   225 nodes to be scored  (4858 eliminated genes)
## 
##   Level 8:   286 nodes to be scored  (6309 eliminated genes)
## 
##   Level 7:   352 nodes to be scored  (8234 eliminated genes)
## 
##   Level 6:   361 nodes to be scored  (10156 eliminated genes)
## 
##   Level 5:   252 nodes to be scored  (11320 eliminated genes)
## 
##   Level 4:   149 nodes to be scored  (12044 eliminated genes)
## 
##   Level 3:   63 nodes to be scored   (12365 eliminated genes)
## 
##   Level 2:   17 nodes to be scored   (12476 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12530 eliminated genes)

Biological Process Analysis for DOWN-REGULATED genes: 56 genes

# Wrapper function for topGO analysis 
ResBPDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=BPann, ontology='BP', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 1641 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  1 nodes to be scored    (18 eliminated genes)
## 
##   Level 15:  4 nodes to be scored    (23 eliminated genes)
## 
##   Level 14:  5 nodes to be scored    (25 eliminated genes)
## 
##   Level 13:  14 nodes to be scored   (122 eliminated genes)
## 
##   Level 12:  30 nodes to be scored   (483 eliminated genes)
## 
##   Level 11:  63 nodes to be scored   (2197 eliminated genes)
## 
##   Level 10:  112 nodes to be scored  (3730 eliminated genes)
## 
##   Level 9:   166 nodes to be scored  (4763 eliminated genes)
## 
##   Level 8:   210 nodes to be scored  (5956 eliminated genes)
## 
##   Level 7:   286 nodes to be scored  (7580 eliminated genes)
## 
##   Level 6:   304 nodes to be scored  (9687 eliminated genes)
## 
##   Level 5:   231 nodes to be scored  (11024 eliminated genes)
## 
##   Level 4:   136 nodes to be scored  (11993 eliminated genes)
## 
##   Level 3:   60 nodes to be scored   (12351 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (12473 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12516 eliminated genes)

GOTable(ResBPDown$ResSel, maxGO=20)

Biological Process Analysis for UP-REGULATED genes: 26 genes

ResBPUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=BPann, ontology='BP', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='BPUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 1035 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 15:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 14:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 13:  5 nodes to be scored    (119 eliminated genes)
## 
##   Level 12:  13 nodes to be scored   (198 eliminated genes)
## 
##   Level 11:  30 nodes to be scored   (1916 eliminated genes)
## 
##   Level 10:  67 nodes to be scored   (3205 eliminated genes)
## 
##   Level 9:   101 nodes to be scored  (3767 eliminated genes)
## 
##   Level 8:   145 nodes to be scored  (4733 eliminated genes)
## 
##   Level 7:   172 nodes to be scored  (6153 eliminated genes)
## 
##   Level 6:   192 nodes to be scored  (8203 eliminated genes)
## 
##   Level 5:   154 nodes to be scored  (9697 eliminated genes)
## 
##   Level 4:   89 nodes to be scored   (11471 eliminated genes)
## 
##   Level 3:   47 nodes to be scored   (12223 eliminated genes)
## 
##   Level 2:   15 nodes to be scored   (12432 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12527 eliminated genes)

GOTable(ResBPUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPDown$ResSel, TopGOResUp=ResBPUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResBPAll$ResSel, ResBPAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResBPDown$ResSel, ResBPDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResBPUp$ResSel, ResBPUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.3 TopGO analysis: Molecular Function

On the basis of the analysis settings, the enrichment for Molecular Function IS performed.

Molecular Function Enrichment for ALL modulated genes: 82 genes

MFann <- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResMFAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=MFann, ontology='MF', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='MFAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 262 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 10:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   7 nodes to be scored    (0 eliminated genes)
## 
##   Level 8:   10 nodes to be scored   (949 eliminated genes)
## 
##   Level 7:   18 nodes to be scored   (2443 eliminated genes)
## 
##   Level 6:   38 nodes to be scored   (2774 eliminated genes)
## 
##   Level 5:   68 nodes to be scored   (3838 eliminated genes)
## 
##   Level 4:   75 nodes to be scored   (6397 eliminated genes)
## 
##   Level 3:   32 nodes to be scored   (9411 eliminated genes)
## 
##   Level 2:   11 nodes to be scored   (10646 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12807 eliminated genes)

Molecular Function Enrichment for DOWN-REGULATED genes: 56 genes

ResMFDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=MFann, ontology='MF', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='MFDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 230 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 10:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   6 nodes to be scored    (0 eliminated genes)
## 
##   Level 8:   10 nodes to be scored   (949 eliminated genes)
## 
##   Level 7:   17 nodes to be scored   (2441 eliminated genes)
## 
##   Level 6:   34 nodes to be scored   (2774 eliminated genes)
## 
##   Level 5:   61 nodes to be scored   (3821 eliminated genes)
## 
##   Level 4:   63 nodes to be scored   (6064 eliminated genes)
## 
##   Level 3:   27 nodes to be scored   (9241 eliminated genes)
## 
##   Level 2:   9 nodes to be scored    (10513 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12800 eliminated genes)

GOTable(ResMFDown$ResSel, maxGO=20)

Molecular Function Analysis for UP-REGULATED genes: 26 genes

ResMFUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=MFann, ontology='MF', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='MFUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 127 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 10:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   4 nodes to be scored    (0 eliminated genes)
## 
##   Level 8:   3 nodes to be scored    (928 eliminated genes)
## 
##   Level 7:   7 nodes to be scored    (2366 eliminated genes)
## 
##   Level 6:   17 nodes to be scored   (2495 eliminated genes)
## 
##   Level 5:   26 nodes to be scored   (3360 eliminated genes)
## 
##   Level 4:   37 nodes to be scored   (4946 eliminated genes)
## 
##   Level 3:   22 nodes to be scored   (7506 eliminated genes)
## 
##   Level 2:   9 nodes to be scored    (9526 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12688 eliminated genes)

GOTable(ResMFUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResMFAll$ResSel, TopGOResDown=ResMFDown$ResSel, TopGOResUp=ResMFUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResMFAll$ResSel, ResMFAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResMFDown$ResSel, ResMFDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResMFUp$ResSel, ResMFUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.4 TopGO analysis: Cellular Component

On the basis of the analysis settings, the enrichment for Cellular Component IS performed.

Cellular Component Enrichment for ALL modulated genes: 82 genes

CCann <- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResCCAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=CCann, ontology='CC', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='CCAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 199 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  12 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   17 nodes to be scored   (231 eliminated genes)
## 
##   Level 8:   23 nodes to be scored   (1054 eliminated genes)
## 
##   Level 7:   30 nodes to be scored   (1961 eliminated genes)
## 
##   Level 6:   33 nodes to be scored   (6865 eliminated genes)
## 
##   Level 5:   32 nodes to be scored   (8811 eliminated genes)
## 
##   Level 4:   22 nodes to be scored   (11461 eliminated genes)
## 
##   Level 3:   24 nodes to be scored   (12669 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13068 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13169 eliminated genes)

#write.table(ResCCAll$ResAll, file=paste0(OutputFolder, 'TopGO/CCAllResults.txt'), sep='\t', row.names=FALSE)

Cellular Component Enrichment for DOWN-REGULATED genes: 56 genes

# Wrapper function for topGO analysis 
ResCCDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=CCann, ontology='CC', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='CCDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 174 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  10 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   15 nodes to be scored   (126 eliminated genes)
## 
##   Level 8:   15 nodes to be scored   (826 eliminated genes)
## 
##   Level 7:   28 nodes to be scored   (1818 eliminated genes)
## 
##   Level 6:   29 nodes to be scored   (6379 eliminated genes)
## 
##   Level 5:   30 nodes to be scored   (8738 eliminated genes)
## 
##   Level 4:   21 nodes to be scored   (10937 eliminated genes)
## 
##   Level 3:   21 nodes to be scored   (12668 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13068 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13164 eliminated genes)

GOTable(ResCCDown$ResSel, maxGO=20)

Cellular Component Analysis for UP-REGULATED genes: 26 genes

# Wrapper function for topGO analysis 
ResCCUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=CCann, ontology='CC', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='CCUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 97 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   5 nodes to be scored    (105 eliminated genes)
## 
##   Level 8:   15 nodes to be scored   (279 eliminated genes)
## 
##   Level 7:   10 nodes to be scored   (1042 eliminated genes)
## 
##   Level 6:   15 nodes to be scored   (6438 eliminated genes)
## 
##   Level 5:   16 nodes to be scored   (7881 eliminated genes)
## 
##   Level 4:   15 nodes to be scored   (11164 eliminated genes)
## 
##   Level 3:   15 nodes to be scored   (12496 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13016 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13144 eliminated genes)

GOTable(ResCCUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResCCAll$ResSel, TopGOResDown=ResCCDown$ResSel, TopGOResUp=ResCCUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResCCAll$ResSel, ResCCAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResCCDown$ResSel, ResCCDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResCCUp$ResSel, ResCCUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

6. Savings

SessionInfo <- sessionInfo()
Date <- date()
#
save.image(paste0(OutputFolder, Dataset, 'FunctionalAnalysisWorkspace.RData'))

SessionInfo

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] DESeq2_1.38.3               sechm_1.6.0                
##  [3] SummarizedExperiment_1.28.0 GenomicRanges_1.50.2       
##  [5] GenomeInfoDb_1.34.9         MatrixGenerics_1.10.0      
##  [7] matrixStats_0.63.0          dplyr_1.1.0                
##  [9] tidyr_1.3.0                 data.table_1.14.8          
## [11] topGO_2.50.0                SparseM_1.81               
## [13] GO.db_3.16.0                graph_1.76.0               
## [15] viridis_0.6.2               viridisLite_0.4.1          
## [17] RColorBrewer_1.1-3          gridExtra_2.3              
## [19] org.Hs.eg.db_3.16.0         AnnotationDbi_1.60.0       
## [21] IRanges_2.32.0              S4Vectors_0.36.1           
## [23] Biobase_2.58.0              BiocGenerics_0.44.0        
## [25] ggplot2_3.4.1               DT_0.27                    
## [27] RNASeqBulkExploratory_0.2.1
## 
## loaded via a namespace (and not attached):
##  [1] Rtsne_0.16             colorspace_2.1-0       rjson_0.2.21          
##  [4] ellipsis_0.3.2         circlize_0.4.15        XVector_0.38.0        
##  [7] GlobalOptions_0.1.2    clue_0.3-64            rstudioapi_0.14       
## [10] farver_2.1.1           bit64_4.0.5            fansi_1.0.4           
## [13] codetools_0.2-19       doParallel_1.0.17      cachem_1.0.7          
## [16] geneplotter_1.76.0     knitr_1.42             jsonlite_1.8.4        
## [19] annotate_1.76.0        cluster_2.1.4          png_0.1-8             
## [22] compiler_4.2.1         httr_1.4.5             lazyeval_0.2.2        
## [25] Matrix_1.5-3           fastmap_1.1.1          cli_3.6.1             
## [28] htmltools_0.5.4        tools_4.2.1            gtable_0.3.1          
## [31] glue_1.6.2             GenomeInfoDbData_1.2.9 V8_4.2.2              
## [34] Rcpp_1.0.10            jquerylib_0.1.4        vctrs_0.6.2           
## [37] Biostrings_2.66.0      iterators_1.0.14       crosstalk_1.2.0       
## [40] xfun_0.37              stringr_1.5.0          lifecycle_1.0.3       
## [43] XML_3.99-0.13          ca_0.71.1              zlibbioc_1.44.0       
## [46] scales_1.2.1           TSP_1.2-2              parallel_4.2.1        
## [49] ComplexHeatmap_2.14.0  yaml_2.3.7             curl_5.0.0            
## [52] memoise_2.0.1          sass_0.4.5             stringi_1.7.12        
## [55] RSQLite_2.3.0          highr_0.10             randomcoloR_1.1.0.1   
## [58] foreach_1.5.2          seriation_1.4.1        BiocParallel_1.32.5   
## [61] shape_1.4.6            rlang_1.1.1            pkgconfig_2.0.3       
## [64] bitops_1.0-7           evaluate_0.20          lattice_0.20-45       
## [67] purrr_1.0.1            labeling_0.4.2         htmlwidgets_1.6.1     
## [70] bit_4.0.5              tidyselect_1.2.0       magrittr_2.0.3        
## [73] R6_2.5.1               generics_0.1.3         DelayedArray_0.24.0   
## [76] DBI_1.1.3              pillar_1.8.1           withr_2.5.0           
## [79] KEGGREST_1.38.0        RCurl_1.98-1.10        tibble_3.2.1          
## [82] crayon_1.5.2           utf8_1.2.3             plotly_4.10.1         
## [85] rmarkdown_2.20         GetoptLong_1.0.5       locfit_1.5-9.7        
## [88] grid_4.2.1             blob_1.2.3             digest_0.6.31         
## [91] xtable_1.8-4           munsell_0.5.0          registry_0.5-1        
## [94] bslib_0.4.2

Date

## [1] "Fri Jul 18 19:27:59 2025"

TopGO - CTL04

Date: July 18, 2025

1. Environment Set Up

2. Data Upload

2.1 Load Data from DEA

2.2 Add DEA results to SE

3. RESULTS NAVIGATION: Interactive Table

4. RESULTS VISUALIZATION

4.1 Volcano plot

4.3 Heatmap for significant genes

5. TOPGO for Gene Ontology Enrichment analysis

5.1 Selection of modulated genes and generation of gene vectors

5.2 TopGO analysis: Biological Process

Biological Process Analysis for ALL modulated genes: 82 genes

Biological Process Analysis for DOWN-REGULATED genes: 56 genes

Biological Process Analysis for UP-REGULATED genes: 26 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.3 TopGO analysis: Molecular Function

Molecular Function Enrichment for ALL modulated genes: 82 genes

Molecular Function Enrichment for DOWN-REGULATED genes: 56 genes

Molecular Function Analysis for UP-REGULATED genes: 26 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.4 TopGO analysis: Cellular Component

Cellular Component Enrichment for ALL modulated genes: 82 genes

Cellular Component Enrichment for DOWN-REGULATED genes: 56 genes

Cellular Component Analysis for UP-REGULATED genes: 26 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

6. Savings