TopGO - CTL04

Condition_1 <- params$Condition_1
Condition_2 <- params$Condition_2

Top GO Analysis for AhHyd AgvsInh

1. Environment Set Up

library(RNASeqBulkExploratory)
library(DT)
library(ggplot2)
library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
library(org.Hs.eg.db)
## 
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library(RColorBrewer)
library(viridis)
## Loading required package: viridisLite
library(topGO)
## Loading required package: graph
## Loading required package: GO.db
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following object is masked from 'package:graph':
## 
##     union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
library(sechm)

source("../../plotGenesInTerm_v2.R")

Dataset <- params$Dataset
logFcTh <- params$logFcTh
FdrTh <- params$FdrTh
OutputFolder <- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder) 


if (dir.exists(OutputFolder) == FALSE) {
  dir.create(OutputFolder, recursive=TRUE)
}

2. Data Upload

Summarized Experiment object containing expression data used for DEA and gene and sample metadata
DEA object, containing results of the differential expression

2.1 Load Data from DEA

# List with differential expression results 
DEA <- readRDS(params$DEAFile)

#SE object coming from DEA, but not containing specific contrast results
SE_DEA <- readRDS(params$SEFile)

2.2 Add DEA results to SE

if(! identical(rownames(SE_DEA), row.names(DEA[[Condition_1]][[Condition_2]]$Res))){
  stop('Expression data in SE and results from differential espression analysis are inconsistent.')
}
## Loading required package: DESeq2


rowData(SE_DEA) <- cbind(rowData(SE_DEA)[,1:6], DEA[[Condition_1]][[Condition_2]]$Res)
  
# Column names must be set to be compliant with the required format to be recognized by ORA
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='log2FoldChange')] <- 'logFC'
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='padj')] <- 'FDR'

#metadata(SE_DEA_Prel)$annotation <- 'hsa'

14305 genes in 21 samples have been testes for differential expression.

Imposing a threshold of 1 on the Log2FC and 0.01 on the FDR (as specified in parameters), 390 genes are selected: 110 up-regulated genes and 280 down-regulated genes.

3. RESULTS NAVIGATION: Interactive Table

An interactive table show the results for the top 500 DEGs (ranked according to FDR).

DEGsTable(SE_DEA, FdrTh=0.01, logFcTh=1, maxGenes=500, saveDEGs=TRUE, outDir=OutputFolder)

4. RESULTS VISUALIZATION

4.1 Volcano plot

The results of the differential expression analysis are visualized by Volcano plot. An interactive version is included in the html (only genes with FDR < threshold), while a static version is saved.

plotVolcanoSE(SE=SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh, FdrCeil=1e-10, logFcCeil=4)

4.3 Heatmap for significant genes

Heatmaps for DEGs, showing scaled vst values.

DEGs <- dplyr::filter(data.frame(rowData(SE_DEA)), FDR < FdrTh & abs(logFC) > logFcTh)   


ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')

colData(SE_DEA)$Condition <- factor(colData(SE_DEA)$Condition, levels=c("CTL", "DMSO", "AhHyd_Ag", "AhHyd_Inh", "Andr_Ag", "Andr_Inh", "Estr_Ag", "Estr_Inh", "GC_Ag", "GC_Inh", "LivX_Ag", "LivX_Inh", "Ret_Ag", "Ret_Inh", "Thyr_Ag", "Thyr_Inh" ))

metadata(SE_DEA)$anno_colors <- list(Condition = c('DMSO' = 'grey30', 'CTL' = 'azure3', 
                 'AhHyd_Ag'='#F8766D', 'AhHyd_Inh'='#F8766D50',
                 'Andr_Ag'='#fccb17', 'Andr_Inh'='#C49A0050',  
                 "Estr_Ag"= '#53B400', "Estr_Inh"= '#53B40050', 
                 'GC_Ag' = '#00C094', 'GC_Inh' = '#00C09450',
                 'LivX_Ag' = '#00B6EB', 'LivX_Inh' = '#00B6EB50', 
                 'Ret_Ag' = '#A58AFF', 'Ret_Inh' = '#A58AFF50', 
                 'Thyr_Ag' = '#FB61D7', 'Thyr_Inh' = '#FB61D750'
                 ))

sechm(SE_DEA, features=DEGs$GeneName, assayName="vst", gaps_at="Condition", show_rownames=FALSE,
      top_annotation=c('Condition'), hmcols=ScaledCols, show_colnames=TRUE,
      do.scale=TRUE, breaks=0.85)

5. TOPGO for Gene Ontology Enrichment analysis

Gene ontology enrichment analysis is performed on the set of 390 genes using TopGO with Fisher statistics and weight01 algorithm.

For each specified domain of the ontology:

Enrichment analysis on all DEGs or splitted in down- and up-regulated

5.1 Selection of modulated genes and generation of gene vectors

I generate vectors for the gene universe, all modulated genes, up-regulated genes and down-regulated genes in the format required by TopGo.

GeneVectors <- topGOGeneVectors(SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh)

## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1

Therefore:

universe genes: 14305 genes
modulated genes: 390 genes
down-regulated genes: 280 genes of interest
up-regulated genes: 110 genes of interest

Then I set parameters according to the gene ontology domains to be evaluated. By default, Biological Process and Molecular Function domains are interrogated.

BpEval <- ifelse(length(grep('BP', params$TopGO))!=0, TRUE, FALSE)
MfEval <- ifelse(length(grep('MF', params$TopGO))!=0, TRUE, FALSE)
CcEval <- ifelse(length(grep('CC', params$TopGO))!=0, TRUE, FALSE)

5.2 TopGO analysis: Biological Process

On the basis of the analysis settings, the enrichment for Biological Process IS performed.

Biological Process Analysis for ALL modulated genes: 390 genes

BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis 
ResBPAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 3681 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  17 nodes to be scored   (50 eliminated genes)
## 
##   Level 14:  35 nodes to be scored   (175 eliminated genes)
## 
##   Level 13:  54 nodes to be scored   (426 eliminated genes)
## 
##   Level 12:  99 nodes to be scored   (1144 eliminated genes)
## 
##   Level 11:  211 nodes to be scored  (2894 eliminated genes)
## 
##   Level 10:  353 nodes to be scored  (4695 eliminated genes)
## 
##   Level 9:   487 nodes to be scored  (6113 eliminated genes)
## 
##   Level 8:   577 nodes to be scored  (7813 eliminated genes)
## 
##   Level 7:   629 nodes to be scored  (9667 eliminated genes)
## 
##   Level 6:   545 nodes to be scored  (10972 eliminated genes)
## 
##   Level 5:   358 nodes to be scored  (11749 eliminated genes)
## 
##   Level 4:   203 nodes to be scored  (12208 eliminated genes)
## 
##   Level 3:   83 nodes to be scored   (12426 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (12508 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12546 eliminated genes)

Biological Process Analysis for DOWN-REGULATED genes: 280 genes

# Wrapper function for topGO analysis 
ResBPDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=BPann, ontology='BP', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 3344 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  15 nodes to be scored   (46 eliminated genes)
## 
##   Level 14:  26 nodes to be scored   (175 eliminated genes)
## 
##   Level 13:  46 nodes to be scored   (371 eliminated genes)
## 
##   Level 12:  86 nodes to be scored   (994 eliminated genes)
## 
##   Level 11:  180 nodes to be scored  (2845 eliminated genes)
## 
##   Level 10:  309 nodes to be scored  (4625 eliminated genes)
## 
##   Level 9:   436 nodes to be scored  (5966 eliminated genes)
## 
##   Level 8:   528 nodes to be scored  (7547 eliminated genes)
## 
##   Level 7:   575 nodes to be scored  (9501 eliminated genes)
## 
##   Level 6:   513 nodes to be scored  (10836 eliminated genes)
## 
##   Level 5:   336 nodes to be scored  (11696 eliminated genes)
## 
##   Level 4:   186 nodes to be scored  (12182 eliminated genes)
## 
##   Level 3:   79 nodes to be scored   (12399 eliminated genes)
## 
##   Level 2:   18 nodes to be scored   (12505 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12537 eliminated genes)

GOTable(ResBPDown$ResSel, maxGO=20)

Biological Process Analysis for UP-REGULATED genes: 110 genes

ResBPUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=BPann, ontology='BP', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='BPUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2332 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 17:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 15:  5 nodes to be scored    (15 eliminated genes)
## 
##   Level 14:  19 nodes to be scored   (53 eliminated genes)
## 
##   Level 13:  31 nodes to be scored   (177 eliminated genes)
## 
##   Level 12:  46 nodes to be scored   (732 eliminated genes)
## 
##   Level 11:  101 nodes to be scored  (2508 eliminated genes)
## 
##   Level 10:  175 nodes to be scored  (4094 eliminated genes)
## 
##   Level 9:   264 nodes to be scored  (5215 eliminated genes)
## 
##   Level 8:   338 nodes to be scored  (6908 eliminated genes)
## 
##   Level 7:   417 nodes to be scored  (8636 eliminated genes)
## 
##   Level 6:   395 nodes to be scored  (10463 eliminated genes)
## 
##   Level 5:   281 nodes to be scored  (11525 eliminated genes)
## 
##   Level 4:   168 nodes to be scored  (12166 eliminated genes)
## 
##   Level 3:   73 nodes to be scored   (12415 eliminated genes)
## 
##   Level 2:   15 nodes to be scored   (12503 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12545 eliminated genes)

GOTable(ResBPUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPDown$ResSel, TopGOResUp=ResBPUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResBPAll$ResSel, ResBPAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResBPDown$ResSel, ResBPDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResBPUp$ResSel, ResBPUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.3 TopGO analysis: Molecular Function

On the basis of the analysis settings, the enrichment for Molecular Function IS performed.

Molecular Function Enrichment for ALL modulated genes: 390 genes

MFann <- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResMFAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=MFann, ontology='MF', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='MFAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 528 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  7 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  9 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   18 nodes to be scored   (152 eliminated genes)
## 
##   Level 8:   32 nodes to be scored   (1146 eliminated genes)
## 
##   Level 7:   50 nodes to be scored   (2982 eliminated genes)
## 
##   Level 6:   94 nodes to be scored   (3500 eliminated genes)
## 
##   Level 5:   128 nodes to be scored  (4701 eliminated genes)
## 
##   Level 4:   130 nodes to be scored  (7583 eliminated genes)
## 
##   Level 3:   47 nodes to be scored   (10159 eliminated genes)
## 
##   Level 2:   12 nodes to be scored   (11108 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12863 eliminated genes)

Molecular Function Enrichment for DOWN-REGULATED genes: 280 genes

ResMFDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=MFann, ontology='MF', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='MFDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 459 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  6 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  9 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   16 nodes to be scored   (137 eliminated genes)
## 
##   Level 8:   30 nodes to be scored   (1146 eliminated genes)
## 
##   Level 7:   43 nodes to be scored   (2966 eliminated genes)
## 
##   Level 6:   82 nodes to be scored   (3458 eliminated genes)
## 
##   Level 5:   109 nodes to be scored  (4498 eliminated genes)
## 
##   Level 4:   110 nodes to be scored  (7499 eliminated genes)
## 
##   Level 3:   43 nodes to be scored   (10036 eliminated genes)
## 
##   Level 2:   10 nodes to be scored   (11002 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12859 eliminated genes)

GOTable(ResMFDown$ResSel, maxGO=20)

Molecular Function Analysis for UP-REGULATED genes: 110 genes

ResMFUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=MFann, ontology='MF', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='MFUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 330 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   14 nodes to be scored   (49 eliminated genes)
## 
##   Level 8:   16 nodes to be scored   (1053 eliminated genes)
## 
##   Level 7:   32 nodes to be scored   (2895 eliminated genes)
## 
##   Level 6:   52 nodes to be scored   (3275 eliminated genes)
## 
##   Level 5:   77 nodes to be scored   (4328 eliminated genes)
## 
##   Level 4:   84 nodes to be scored   (6587 eliminated genes)
## 
##   Level 3:   36 nodes to be scored   (9523 eliminated genes)
## 
##   Level 2:   11 nodes to be scored   (10601 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12860 eliminated genes)

GOTable(ResMFUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResMFAll$ResSel, TopGOResDown=ResMFDown$ResSel, TopGOResUp=ResMFUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResMFAll$ResSel, ResMFAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResMFDown$ResSel, ResMFDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResMFUp$ResSel, ResMFUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.4 TopGO analysis: Cellular Component

On the basis of the analysis settings, the enrichment for Cellular Component IS performed.

Cellular Component Enrichment for ALL modulated genes: 390 genes

CCann <- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResCCAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=CCann, ontology='CC', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='CCAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 438 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  17 nodes to be scored   (30 eliminated genes)
## 
##   Level 10:  37 nodes to be scored   (59 eliminated genes)
## 
##   Level 9:   57 nodes to be scored   (632 eliminated genes)
## 
##   Level 8:   61 nodes to be scored   (2186 eliminated genes)
## 
##   Level 7:   73 nodes to be scored   (4450 eliminated genes)
## 
##   Level 6:   64 nodes to be scored   (8120 eliminated genes)
## 
##   Level 5:   54 nodes to be scored   (9747 eliminated genes)
## 
##   Level 4:   35 nodes to be scored   (11620 eliminated genes)
## 
##   Level 3:   34 nodes to be scored   (12700 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13073 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13169 eliminated genes)

#write.table(ResCCAll$ResAll, file=paste0(OutputFolder, 'TopGO/CCAllResults.txt'), sep='\t', row.names=FALSE)

Cellular Component Enrichment for DOWN-REGULATED genes: 280 genes

# Wrapper function for topGO analysis 
ResCCDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=CCann, ontology='CC', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='CCDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 404 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 13:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 12:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 11:  15 nodes to be scored   (30 eliminated genes)
## 
##   Level 10:  33 nodes to be scored   (59 eliminated genes)
## 
##   Level 9:   48 nodes to be scored   (596 eliminated genes)
## 
##   Level 8:   56 nodes to be scored   (2025 eliminated genes)
## 
##   Level 7:   69 nodes to be scored   (4207 eliminated genes)
## 
##   Level 6:   60 nodes to be scored   (8078 eliminated genes)
## 
##   Level 5:   52 nodes to be scored   (9742 eliminated genes)
## 
##   Level 4:   32 nodes to be scored   (11612 eliminated genes)
## 
##   Level 3:   33 nodes to be scored   (12693 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13071 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13169 eliminated genes)

GOTable(ResCCDown$ResSel, maxGO=20)

Cellular Component Analysis for UP-REGULATED genes: 110 genes

# Wrapper function for topGO analysis 
ResCCUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=CCann, ontology='CC', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='CCUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 295 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  6 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  20 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   36 nodes to be scored   (287 eliminated genes)
## 
##   Level 8:   42 nodes to be scored   (1657 eliminated genes)
## 
##   Level 7:   45 nodes to be scored   (3682 eliminated genes)
## 
##   Level 6:   41 nodes to be scored   (7831 eliminated genes)
## 
##   Level 5:   43 nodes to be scored   (9320 eliminated genes)
## 
##   Level 4:   30 nodes to be scored   (11477 eliminated genes)
## 
##   Level 3:   29 nodes to be scored   (12672 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13054 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13169 eliminated genes)

GOTable(ResCCUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResCCAll$ResSel, TopGOResDown=ResCCDown$ResSel, TopGOResUp=ResCCUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResCCAll$ResSel, ResCCAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResCCDown$ResSel, ResCCDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResCCUp$ResSel, ResCCUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

6. Savings

SessionInfo <- sessionInfo()
Date <- date()
#
save.image(paste0(OutputFolder, Dataset, 'FunctionalAnalysisWorkspace.RData'))

SessionInfo

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] DESeq2_1.38.3               sechm_1.6.0                
##  [3] SummarizedExperiment_1.28.0 GenomicRanges_1.50.2       
##  [5] GenomeInfoDb_1.34.9         MatrixGenerics_1.10.0      
##  [7] matrixStats_0.63.0          dplyr_1.1.0                
##  [9] tidyr_1.3.0                 data.table_1.14.8          
## [11] topGO_2.50.0                SparseM_1.81               
## [13] GO.db_3.16.0                graph_1.76.0               
## [15] viridis_0.6.2               viridisLite_0.4.1          
## [17] RColorBrewer_1.1-3          gridExtra_2.3              
## [19] org.Hs.eg.db_3.16.0         AnnotationDbi_1.60.0       
## [21] IRanges_2.32.0              S4Vectors_0.36.1           
## [23] Biobase_2.58.0              BiocGenerics_0.44.0        
## [25] ggplot2_3.4.1               DT_0.27                    
## [27] RNASeqBulkExploratory_0.2.1
## 
## loaded via a namespace (and not attached):
##  [1] Rtsne_0.16             colorspace_2.1-0       rjson_0.2.21          
##  [4] ellipsis_0.3.2         circlize_0.4.15        XVector_0.38.0        
##  [7] GlobalOptions_0.1.2    clue_0.3-64            rstudioapi_0.14       
## [10] farver_2.1.1           bit64_4.0.5            fansi_1.0.4           
## [13] codetools_0.2-19       doParallel_1.0.17      cachem_1.0.7          
## [16] geneplotter_1.76.0     knitr_1.42             jsonlite_1.8.4        
## [19] annotate_1.76.0        cluster_2.1.4          png_0.1-8             
## [22] compiler_4.2.1         httr_1.4.5             lazyeval_0.2.2        
## [25] Matrix_1.5-3           fastmap_1.1.1          cli_3.6.1             
## [28] htmltools_0.5.4        tools_4.2.1            gtable_0.3.1          
## [31] glue_1.6.2             GenomeInfoDbData_1.2.9 V8_4.2.2              
## [34] Rcpp_1.0.10            jquerylib_0.1.4        vctrs_0.6.2           
## [37] Biostrings_2.66.0      iterators_1.0.14       crosstalk_1.2.0       
## [40] xfun_0.37              stringr_1.5.0          lifecycle_1.0.3       
## [43] XML_3.99-0.13          ca_0.71.1              zlibbioc_1.44.0       
## [46] scales_1.2.1           TSP_1.2-2              parallel_4.2.1        
## [49] ComplexHeatmap_2.14.0  yaml_2.3.7             curl_5.0.0            
## [52] memoise_2.0.1          sass_0.4.5             stringi_1.7.12        
## [55] RSQLite_2.3.0          highr_0.10             randomcoloR_1.1.0.1   
## [58] foreach_1.5.2          seriation_1.4.1        BiocParallel_1.32.5   
## [61] shape_1.4.6            rlang_1.1.1            pkgconfig_2.0.3       
## [64] bitops_1.0-7           evaluate_0.20          lattice_0.20-45       
## [67] purrr_1.0.1            labeling_0.4.2         htmlwidgets_1.6.1     
## [70] bit_4.0.5              tidyselect_1.2.0       magrittr_2.0.3        
## [73] R6_2.5.1               generics_0.1.3         DelayedArray_0.24.0   
## [76] DBI_1.1.3              pillar_1.8.1           withr_2.5.0           
## [79] KEGGREST_1.38.0        RCurl_1.98-1.10        tibble_3.2.1          
## [82] crayon_1.5.2           utf8_1.2.3             plotly_4.10.1         
## [85] rmarkdown_2.20         GetoptLong_1.0.5       locfit_1.5-9.7        
## [88] grid_4.2.1             blob_1.2.3             digest_0.6.31         
## [91] xtable_1.8-4           munsell_0.5.0          registry_0.5-1        
## [94] bslib_0.4.2

Date

## [1] "Fri Jul 18 20:15:17 2025"

TopGO - CTL04

Date: July 18, 2025

1. Environment Set Up

2. Data Upload

2.1 Load Data from DEA

2.2 Add DEA results to SE

3. RESULTS NAVIGATION: Interactive Table

4. RESULTS VISUALIZATION

4.1 Volcano plot

4.3 Heatmap for significant genes

5. TOPGO for Gene Ontology Enrichment analysis

5.1 Selection of modulated genes and generation of gene vectors

5.2 TopGO analysis: Biological Process

Biological Process Analysis for ALL modulated genes: 390 genes

Biological Process Analysis for DOWN-REGULATED genes: 280 genes

Biological Process Analysis for UP-REGULATED genes: 110 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.3 TopGO analysis: Molecular Function

Molecular Function Enrichment for ALL modulated genes: 390 genes

Molecular Function Enrichment for DOWN-REGULATED genes: 280 genes

Molecular Function Analysis for UP-REGULATED genes: 110 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.4 TopGO analysis: Cellular Component

Cellular Component Enrichment for ALL modulated genes: 390 genes

Cellular Component Enrichment for DOWN-REGULATED genes: 280 genes

Cellular Component Analysis for UP-REGULATED genes: 110 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

6. Savings