TopGO - CTL04

Condition_1 <- params$Condition_1
Condition_2 <- params$Condition_2

Top GO Analysis for Andr AgvsInh

1. Environment Set Up

library(RNASeqBulkExploratory)
library(DT)
library(ggplot2)
library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
library(org.Hs.eg.db)
## 
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library(RColorBrewer)
library(viridis)
## Loading required package: viridisLite
library(topGO)
## Loading required package: graph
## Loading required package: GO.db
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following object is masked from 'package:graph':
## 
##     union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
library(sechm)

source("../../plotGenesInTerm_v2.R")

Dataset <- params$Dataset
logFcTh <- params$logFcTh
FdrTh <- params$FdrTh
OutputFolder <- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder) 


if (dir.exists(OutputFolder) == FALSE) {
  dir.create(OutputFolder, recursive=TRUE)
}

2. Data Upload

Summarized Experiment object containing expression data used for DEA and gene and sample metadata
DEA object, containing results of the differential expression

2.1 Load Data from DEA

# List with differential expression results 
DEA <- readRDS(params$DEAFile)

#SE object coming from DEA, but not containing specific contrast results
SE_DEA <- readRDS(params$SEFile)

2.2 Add DEA results to SE

if(! identical(rownames(SE_DEA), row.names(DEA[[Condition_1]][[Condition_2]]$Res))){
  stop('Expression data in SE and results from differential espression analysis are inconsistent.')
}
## Loading required package: DESeq2


rowData(SE_DEA) <- cbind(rowData(SE_DEA)[,1:6], DEA[[Condition_1]][[Condition_2]]$Res)
  
# Column names must be set to be compliant with the required format to be recognized by ORA
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='log2FoldChange')] <- 'logFC'
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='padj')] <- 'FDR'

#metadata(SE_DEA_Prel)$annotation <- 'hsa'

14305 genes in 21 samples have been testes for differential expression.

Imposing a threshold of 1 on the Log2FC and 0.01 on the FDR (as specified in parameters), 550 genes are selected: 325 up-regulated genes and 225 down-regulated genes.

3. RESULTS NAVIGATION: Interactive Table

An interactive table show the results for the top 500 DEGs (ranked according to FDR).

DEGsTable(SE_DEA, FdrTh=0.01, logFcTh=1, maxGenes=500, saveDEGs=TRUE, outDir=OutputFolder)

## Top 500 genes will be included in the table

4. RESULTS VISUALIZATION

4.1 Volcano plot

The results of the differential expression analysis are visualized by Volcano plot. An interactive version is included in the html (only genes with FDR < threshold), while a static version is saved.

plotVolcanoSE(SE=SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh, FdrCeil=1e-10, logFcCeil=4)

4.3 Heatmap for significant genes

Heatmaps for DEGs, showing scaled vst values.

DEGs <- dplyr::filter(data.frame(rowData(SE_DEA)), FDR < FdrTh & abs(logFC) > logFcTh)   


ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')

colData(SE_DEA)$Condition <- factor(colData(SE_DEA)$Condition, levels=c("CTL", "DMSO", "AhHyd_Ag", "AhHyd_Inh", "Andr_Ag", "Andr_Inh", "Estr_Ag", "Estr_Inh", "GC_Ag", "GC_Inh", "LivX_Ag", "LivX_Inh", "Ret_Ag", "Ret_Inh", "Thyr_Ag", "Thyr_Inh" ))

metadata(SE_DEA)$anno_colors <- list(Condition = c('DMSO' = 'grey30', 'CTL' = 'azure3', 
                 'AhHyd_Ag'='#F8766D', 'AhHyd_Inh'='#F8766D50',
                 'Andr_Ag'='#fccb17', 'Andr_Inh'='#C49A0050',  
                 "Estr_Ag"= '#53B400', "Estr_Inh"= '#53B40050', 
                 'GC_Ag' = '#00C094', 'GC_Inh' = '#00C09450',
                 'LivX_Ag' = '#00B6EB', 'LivX_Inh' = '#00B6EB50', 
                 'Ret_Ag' = '#A58AFF', 'Ret_Inh' = '#A58AFF50', 
                 'Thyr_Ag' = '#FB61D7', 'Thyr_Inh' = '#FB61D750'
                 ))

sechm(SE_DEA, features=DEGs$GeneName, assayName="vst", gaps_at="Condition", show_rownames=FALSE,
      top_annotation=c('Condition'), hmcols=ScaledCols, show_colnames=TRUE,
      do.scale=TRUE, breaks=0.85)

5. TOPGO for Gene Ontology Enrichment analysis

Gene ontology enrichment analysis is performed on the set of 550 genes using TopGO with Fisher statistics and weight01 algorithm.

For each specified domain of the ontology:

Enrichment analysis on all DEGs or splitted in down- and up-regulated

5.1 Selection of modulated genes and generation of gene vectors

I generate vectors for the gene universe, all modulated genes, up-regulated genes and down-regulated genes in the format required by TopGo.

GeneVectors <- topGOGeneVectors(SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh)

## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1

Therefore:

universe genes: 14305 genes
modulated genes: 550 genes
down-regulated genes: 225 genes of interest
up-regulated genes: 325 genes of interest

Then I set parameters according to the gene ontology domains to be evaluated. By default, Biological Process and Molecular Function domains are interrogated.

BpEval <- ifelse(length(grep('BP', params$TopGO))!=0, TRUE, FALSE)
MfEval <- ifelse(length(grep('MF', params$TopGO))!=0, TRUE, FALSE)
CcEval <- ifelse(length(grep('CC', params$TopGO))!=0, TRUE, FALSE)

5.2 TopGO analysis: Biological Process

On the basis of the analysis settings, the enrichment for Biological Process IS performed.

Biological Process Analysis for ALL modulated genes: 550 genes

BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis 
ResBPAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 3892 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  14 nodes to be scored   (25 eliminated genes)
## 
##   Level 14:  36 nodes to be scored   (132 eliminated genes)
## 
##   Level 13:  55 nodes to be scored   (359 eliminated genes)
## 
##   Level 12:  105 nodes to be scored  (1129 eliminated genes)
## 
##   Level 11:  237 nodes to be scored  (2978 eliminated genes)
## 
##   Level 10:  409 nodes to be scored  (4899 eliminated genes)
## 
##   Level 9:   534 nodes to be scored  (6423 eliminated genes)
## 
##   Level 8:   599 nodes to be scored  (8209 eliminated genes)
## 
##   Level 7:   654 nodes to be scored  (9916 eliminated genes)
## 
##   Level 6:   568 nodes to be scored  (11064 eliminated genes)
## 
##   Level 5:   370 nodes to be scored  (11822 eliminated genes)
## 
##   Level 4:   199 nodes to be scored  (12251 eliminated genes)
## 
##   Level 3:   87 nodes to be scored   (12430 eliminated genes)
## 
##   Level 2:   17 nodes to be scored   (12507 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12546 eliminated genes)

Biological Process Analysis for DOWN-REGULATED genes: 225 genes

# Wrapper function for topGO analysis 
ResBPDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=BPann, ontology='BP', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2673 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 15:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 14:  9 nodes to be scored    (0 eliminated genes)
## 
##   Level 13:  23 nodes to be scored   (90 eliminated genes)
## 
##   Level 12:  65 nodes to be scored   (551 eliminated genes)
## 
##   Level 11:  143 nodes to be scored  (2472 eliminated genes)
## 
##   Level 10:  241 nodes to be scored  (4396 eliminated genes)
## 
##   Level 9:   355 nodes to be scored  (5852 eliminated genes)
## 
##   Level 8:   403 nodes to be scored  (7399 eliminated genes)
## 
##   Level 7:   458 nodes to be scored  (9387 eliminated genes)
## 
##   Level 6:   425 nodes to be scored  (10809 eliminated genes)
## 
##   Level 5:   302 nodes to be scored  (11656 eliminated genes)
## 
##   Level 4:   158 nodes to be scored  (12174 eliminated genes)
## 
##   Level 3:   72 nodes to be scored   (12409 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (12505 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12545 eliminated genes)

GOTable(ResBPDown$ResSel, maxGO=20)

Biological Process Analysis for UP-REGULATED genes: 325 genes

ResBPUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=BPann, ontology='BP', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='BPUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 3425 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  14 nodes to be scored   (25 eliminated genes)
## 
##   Level 14:  34 nodes to be scored   (132 eliminated genes)
## 
##   Level 13:  51 nodes to be scored   (359 eliminated genes)
## 
##   Level 12:  82 nodes to be scored   (1100 eliminated genes)
## 
##   Level 11:  186 nodes to be scored  (2908 eliminated genes)
## 
##   Level 10:  329 nodes to be scored  (4627 eliminated genes)
## 
##   Level 9:   453 nodes to be scored  (5984 eliminated genes)
## 
##   Level 8:   527 nodes to be scored  (7735 eliminated genes)
## 
##   Level 7:   578 nodes to be scored  (9648 eliminated genes)
## 
##   Level 6:   520 nodes to be scored  (10940 eliminated genes)
## 
##   Level 5:   350 nodes to be scored  (11725 eliminated genes)
## 
##   Level 4:   192 nodes to be scored  (12224 eliminated genes)
## 
##   Level 3:   84 nodes to be scored   (12424 eliminated genes)
## 
##   Level 2:   17 nodes to be scored   (12504 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12546 eliminated genes)

GOTable(ResBPUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPDown$ResSel, TopGOResUp=ResBPUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResBPAll$ResSel, ResBPAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResBPDown$ResSel, ResBPDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResBPUp$ResSel, ResBPUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.3 TopGO analysis: Molecular Function

On the basis of the analysis settings, the enrichment for Molecular Function IS performed.

Molecular Function Enrichment for ALL modulated genes: 550 genes

MFann <- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResMFAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=MFann, ontology='MF', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='MFAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 611 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  11 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   22 nodes to be scored   (131 eliminated genes)
## 
##   Level 8:   39 nodes to be scored   (1183 eliminated genes)
## 
##   Level 7:   74 nodes to be scored   (3108 eliminated genes)
## 
##   Level 6:   113 nodes to be scored  (3620 eliminated genes)
## 
##   Level 5:   140 nodes to be scored  (5004 eliminated genes)
## 
##   Level 4:   137 nodes to be scored  (7884 eliminated genes)
## 
##   Level 3:   53 nodes to be scored   (10255 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (11079 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12881 eliminated genes)

Molecular Function Enrichment for DOWN-REGULATED genes: 225 genes

ResMFDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=MFann, ontology='MF', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='MFDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 443 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  6 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   15 nodes to be scored   (78 eliminated genes)
## 
##   Level 8:   28 nodes to be scored   (1062 eliminated genes)
## 
##   Level 7:   50 nodes to be scored   (3031 eliminated genes)
## 
##   Level 6:   82 nodes to be scored   (3465 eliminated genes)
## 
##   Level 5:   98 nodes to be scored   (4615 eliminated genes)
## 
##   Level 4:   101 nodes to be scored  (7601 eliminated genes)
## 
##   Level 3:   46 nodes to be scored   (9965 eliminated genes)
## 
##   Level 2:   14 nodes to be scored   (10935 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12874 eliminated genes)

GOTable(ResMFDown$ResSel, maxGO=20)

Molecular Function Analysis for UP-REGULATED genes: 325 genes

ResMFUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=MFann, ontology='MF', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='MFUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 515 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   17 nodes to be scored   (131 eliminated genes)
## 
##   Level 8:   30 nodes to be scored   (1110 eliminated genes)
## 
##   Level 7:   57 nodes to be scored   (3001 eliminated genes)
## 
##   Level 6:   91 nodes to be scored   (3475 eliminated genes)
## 
##   Level 5:   127 nodes to be scored  (4762 eliminated genes)
## 
##   Level 4:   116 nodes to be scored  (7568 eliminated genes)
## 
##   Level 3:   48 nodes to be scored   (10206 eliminated genes)
## 
##   Level 2:   15 nodes to be scored   (10998 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12879 eliminated genes)

GOTable(ResMFUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResMFAll$ResSel, TopGOResDown=ResMFDown$ResSel, TopGOResUp=ResMFUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResMFAll$ResSel, ResMFAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResMFDown$ResSel, ResMFDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResMFUp$ResSel, ResMFUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.4 TopGO analysis: Cellular Component

On the basis of the analysis settings, the enrichment for Cellular Component IS performed.

Cellular Component Enrichment for ALL modulated genes: 550 genes

CCann <- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResCCAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=CCann, ontology='CC', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='CCAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 457 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  12 nodes to be scored   (30 eliminated genes)
## 
##   Level 10:  40 nodes to be scored   (42 eliminated genes)
## 
##   Level 9:   61 nodes to be scored   (538 eliminated genes)
## 
##   Level 8:   66 nodes to be scored   (2465 eliminated genes)
## 
##   Level 7:   74 nodes to be scored   (4536 eliminated genes)
## 
##   Level 6:   71 nodes to be scored   (8156 eliminated genes)
## 
##   Level 5:   56 nodes to be scored   (9765 eliminated genes)
## 
##   Level 4:   37 nodes to be scored   (11647 eliminated genes)
## 
##   Level 3:   35 nodes to be scored   (12704 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13073 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13169 eliminated genes)

#write.table(ResCCAll$ResAll, file=paste0(OutputFolder, 'TopGO/CCAllResults.txt'), sep='\t', row.names=FALSE)

Cellular Component Enrichment for DOWN-REGULATED genes: 225 genes

# Wrapper function for topGO analysis 
ResCCDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=CCann, ontology='CC', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='CCDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 320 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  7 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  20 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   35 nodes to be scored   (329 eliminated genes)
## 
##   Level 8:   45 nodes to be scored   (1614 eliminated genes)
## 
##   Level 7:   50 nodes to be scored   (3928 eliminated genes)
## 
##   Level 6:   54 nodes to be scored   (7856 eliminated genes)
## 
##   Level 5:   46 nodes to be scored   (9654 eliminated genes)
## 
##   Level 4:   29 nodes to be scored   (11595 eliminated genes)
## 
##   Level 3:   31 nodes to be scored   (12694 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13071 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13169 eliminated genes)

GOTable(ResCCDown$ResSel, maxGO=20)

Cellular Component Analysis for UP-REGULATED genes: 325 genes

# Wrapper function for topGO analysis 
ResCCUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=CCann, ontology='CC', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='CCUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 408 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  9 nodes to be scored    (30 eliminated genes)
## 
##   Level 10:  35 nodes to be scored   (42 eliminated genes)
## 
##   Level 9:   53 nodes to be scored   (430 eliminated genes)
## 
##   Level 8:   59 nodes to be scored   (2306 eliminated genes)
## 
##   Level 7:   66 nodes to be scored   (4396 eliminated genes)
## 
##   Level 6:   62 nodes to be scored   (8040 eliminated genes)
## 
##   Level 5:   51 nodes to be scored   (9690 eliminated genes)
## 
##   Level 4:   35 nodes to be scored   (11626 eliminated genes)
## 
##   Level 3:   33 nodes to be scored   (12703 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13073 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13169 eliminated genes)

GOTable(ResCCUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResCCAll$ResSel, TopGOResDown=ResCCDown$ResSel, TopGOResUp=ResCCUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResCCAll$ResSel, ResCCAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResCCDown$ResSel, ResCCDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResCCUp$ResSel, ResCCUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

6. Savings

SessionInfo <- sessionInfo()
Date <- date()
#
save.image(paste0(OutputFolder, Dataset, 'FunctionalAnalysisWorkspace.RData'))

SessionInfo

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] DESeq2_1.38.3               sechm_1.6.0                
##  [3] SummarizedExperiment_1.28.0 GenomicRanges_1.50.2       
##  [5] GenomeInfoDb_1.34.9         MatrixGenerics_1.10.0      
##  [7] matrixStats_0.63.0          dplyr_1.1.0                
##  [9] tidyr_1.3.0                 data.table_1.14.8          
## [11] topGO_2.50.0                SparseM_1.81               
## [13] GO.db_3.16.0                graph_1.76.0               
## [15] viridis_0.6.2               viridisLite_0.4.1          
## [17] RColorBrewer_1.1-3          gridExtra_2.3              
## [19] org.Hs.eg.db_3.16.0         AnnotationDbi_1.60.0       
## [21] IRanges_2.32.0              S4Vectors_0.36.1           
## [23] Biobase_2.58.0              BiocGenerics_0.44.0        
## [25] ggplot2_3.4.1               DT_0.27                    
## [27] RNASeqBulkExploratory_0.2.1
## 
## loaded via a namespace (and not attached):
##  [1] Rtsne_0.16             colorspace_2.1-0       rjson_0.2.21          
##  [4] ellipsis_0.3.2         circlize_0.4.15        XVector_0.38.0        
##  [7] GlobalOptions_0.1.2    clue_0.3-64            rstudioapi_0.14       
## [10] farver_2.1.1           bit64_4.0.5            fansi_1.0.4           
## [13] codetools_0.2-19       doParallel_1.0.17      cachem_1.0.7          
## [16] geneplotter_1.76.0     knitr_1.42             jsonlite_1.8.4        
## [19] annotate_1.76.0        cluster_2.1.4          png_0.1-8             
## [22] compiler_4.2.1         httr_1.4.5             lazyeval_0.2.2        
## [25] Matrix_1.5-3           fastmap_1.1.1          cli_3.6.1             
## [28] htmltools_0.5.4        tools_4.2.1            gtable_0.3.1          
## [31] glue_1.6.2             GenomeInfoDbData_1.2.9 V8_4.2.2              
## [34] Rcpp_1.0.10            jquerylib_0.1.4        vctrs_0.6.2           
## [37] Biostrings_2.66.0      iterators_1.0.14       crosstalk_1.2.0       
## [40] xfun_0.37              stringr_1.5.0          lifecycle_1.0.3       
## [43] XML_3.99-0.13          ca_0.71.1              zlibbioc_1.44.0       
## [46] scales_1.2.1           TSP_1.2-2              parallel_4.2.1        
## [49] ComplexHeatmap_2.14.0  yaml_2.3.7             curl_5.0.0            
## [52] memoise_2.0.1          sass_0.4.5             stringi_1.7.12        
## [55] RSQLite_2.3.0          highr_0.10             randomcoloR_1.1.0.1   
## [58] foreach_1.5.2          seriation_1.4.1        BiocParallel_1.32.5   
## [61] shape_1.4.6            rlang_1.1.1            pkgconfig_2.0.3       
## [64] bitops_1.0-7           evaluate_0.20          lattice_0.20-45       
## [67] purrr_1.0.1            labeling_0.4.2         htmlwidgets_1.6.1     
## [70] bit_4.0.5              tidyselect_1.2.0       magrittr_2.0.3        
## [73] R6_2.5.1               generics_0.1.3         DelayedArray_0.24.0   
## [76] DBI_1.1.3              pillar_1.8.1           withr_2.5.0           
## [79] KEGGREST_1.38.0        RCurl_1.98-1.10        tibble_3.2.1          
## [82] crayon_1.5.2           utf8_1.2.3             plotly_4.10.1         
## [85] rmarkdown_2.20         GetoptLong_1.0.5       locfit_1.5-9.7        
## [88] grid_4.2.1             blob_1.2.3             digest_0.6.31         
## [91] xtable_1.8-4           munsell_0.5.0          registry_0.5-1        
## [94] bslib_0.4.2

Date

## [1] "Fri Jul 18 19:24:04 2025"

TopGO - CTL04

Date: July 18, 2025

1. Environment Set Up

2. Data Upload

2.1 Load Data from DEA

2.2 Add DEA results to SE

3. RESULTS NAVIGATION: Interactive Table

4. RESULTS VISUALIZATION

4.1 Volcano plot

4.3 Heatmap for significant genes

5. TOPGO for Gene Ontology Enrichment analysis

5.1 Selection of modulated genes and generation of gene vectors

5.2 TopGO analysis: Biological Process

Biological Process Analysis for ALL modulated genes: 550 genes

Biological Process Analysis for DOWN-REGULATED genes: 225 genes

Biological Process Analysis for UP-REGULATED genes: 325 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.3 TopGO analysis: Molecular Function

Molecular Function Enrichment for ALL modulated genes: 550 genes

Molecular Function Enrichment for DOWN-REGULATED genes: 225 genes

Molecular Function Analysis for UP-REGULATED genes: 325 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.4 TopGO analysis: Cellular Component

Cellular Component Enrichment for ALL modulated genes: 550 genes

Cellular Component Enrichment for DOWN-REGULATED genes: 225 genes

Cellular Component Analysis for UP-REGULATED genes: 325 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

6. Savings