TopGO - CTL04

Condition_1 <- params$Condition_1
Condition_2 <- params$Condition_2

Top GO Analysis for Thyr Inhibitor

1. Environment Set Up

library(RNASeqBulkExploratory)
library(DT)
library(ggplot2)
library(AnnotationDbi)
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
library(org.Hs.eg.db)
## 
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library(RColorBrewer)
library(viridis)
## Loading required package: viridisLite
library(topGO)
## Loading required package: graph
## Loading required package: GO.db
## 
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.
## 
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
## 
##     members
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following object is masked from 'package:graph':
## 
##     union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
library(sechm)

source("../../plotGenesInTerm_v2.R")

Dataset <- params$Dataset
logFcTh <- params$logFcTh
FdrTh <- params$FdrTh
OutputFolder <- ifelse(is.null(params$OutputFolder), getwd(), params$OutputFolder) 


if (dir.exists(OutputFolder) == FALSE) {
  dir.create(OutputFolder, recursive=TRUE)
}

2. Data Upload

Summarized Experiment object containing expression data used for DEA and gene and sample metadata
DEA object, containing results of the differential expression

2.1 Load Data from DEA

# List with differential expression results 
DEA <- readRDS(params$DEAFile)

#SE object coming from DEA, but not containing specific contrast results
SE_DEA <- readRDS(params$SEFile)

2.2 Add DEA results to SE

if(! identical(rownames(SE_DEA), row.names(DEA[[Condition_1]][[Condition_2]]$Res))){
  stop('Expression data in SE and results from differential espression analysis are inconsistent.')
}
## Loading required package: DESeq2


rowData(SE_DEA) <- cbind(rowData(SE_DEA)[,1:6], DEA[[Condition_1]][[Condition_2]]$Res)
  
# Column names must be set to be compliant with the required format to be recognized by ORA
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='log2FoldChange')] <- 'logFC'
names(rowData(SE_DEA))[which(names(rowData(SE_DEA))=='padj')] <- 'FDR'

#metadata(SE_DEA_Prel)$annotation <- 'hsa'

14305 genes in 21 samples have been testes for differential expression.

Imposing a threshold of 1 on the Log2FC and 0.01 on the FDR (as specified in parameters), 417 genes are selected: 160 up-regulated genes and 257 down-regulated genes.

3. RESULTS NAVIGATION: Interactive Table

An interactive table show the results for the top 500 DEGs (ranked according to FDR).

DEGsTable(SE_DEA, FdrTh=0.01, logFcTh=1, maxGenes=500, saveDEGs=TRUE, outDir=OutputFolder)

4. RESULTS VISUALIZATION

4.1 Volcano plot

The results of the differential expression analysis are visualized by Volcano plot. An interactive version is included in the html (only genes with FDR < threshold), while a static version is saved.

plotVolcanoSE(SE=SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh, FdrCeil=1e-10, logFcCeil=4)

4.3 Heatmap for significant genes

Heatmaps for DEGs, showing scaled vst values.

DEGs <- dplyr::filter(data.frame(rowData(SE_DEA)), FDR < FdrTh & abs(logFC) > logFcTh)   


ScaledCols <- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1')

colData(SE_DEA)$Condition <- factor(colData(SE_DEA)$Condition, levels=c("CTL", "DMSO", "AhHyd_Ag", "AhHyd_Inh", "Andr_Ag", "Andr_Inh", "Estr_Ag", "Estr_Inh", "GC_Ag", "GC_Inh", "LivX_Ag", "LivX_Inh", "Ret_Ag", "Ret_Inh", "Thyr_Ag", "Thyr_Inh" ))

metadata(SE_DEA)$anno_colors <- list(Condition = c('DMSO' = 'grey30', 'CTL' = 'azure3', 
                 'AhHyd_Ag'='#F8766D', 'AhHyd_Inh'='#F8766D50',
                 'Andr_Ag'='#fccb17', 'Andr_Inh'='#C49A0050',  
                 "Estr_Ag"= '#53B400', "Estr_Inh"= '#53B40050', 
                 'GC_Ag' = '#00C094', 'GC_Inh' = '#00C09450',
                 'LivX_Ag' = '#00B6EB', 'LivX_Inh' = '#00B6EB50', 
                 'Ret_Ag' = '#A58AFF', 'Ret_Inh' = '#A58AFF50', 
                 'Thyr_Ag' = '#FB61D7', 'Thyr_Inh' = '#FB61D750'
                 ))

sechm(SE_DEA, features=DEGs$GeneName, assayName="vst", gaps_at="Condition", show_rownames=FALSE,
      top_annotation=c('Condition'), hmcols=ScaledCols, show_colnames=TRUE,
      do.scale=TRUE, breaks=0.85)

5. TOPGO for Gene Ontology Enrichment analysis

Gene ontology enrichment analysis is performed on the set of 417 genes using TopGO with Fisher statistics and weight01 algorithm.

For each specified domain of the ontology:

Enrichment analysis on all DEGs or splitted in down- and up-regulated

5.1 Selection of modulated genes and generation of gene vectors

I generate vectors for the gene universe, all modulated genes, up-regulated genes and down-regulated genes in the format required by TopGo.

GeneVectors <- topGOGeneVectors(SE_DEA, FdrTh=FdrTh, logFcTh=logFcTh)

## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1
## Gene vector contains levels: 0,1

Therefore:

universe genes: 14305 genes
modulated genes: 417 genes
down-regulated genes: 257 genes of interest
up-regulated genes: 160 genes of interest

Then I set parameters according to the gene ontology domains to be evaluated. By default, Biological Process and Molecular Function domains are interrogated.

BpEval <- ifelse(length(grep('BP', params$TopGO))!=0, TRUE, FALSE)
MfEval <- ifelse(length(grep('MF', params$TopGO))!=0, TRUE, FALSE)
CcEval <- ifelse(length(grep('CC', params$TopGO))!=0, TRUE, FALSE)

5.2 TopGO analysis: Biological Process

On the basis of the analysis settings, the enrichment for Biological Process IS performed.

Biological Process Analysis for ALL modulated genes: 417 genes

BPann <- topGO::annFUN.org(whichOnto="BP", feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping="org.Hs.eg.db", ID="symbol") %>% inverseList()

# Wrapper function for topGO analysis 
ResBPAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=BPann, ontology='BP', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 3643 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  6 nodes to be scored    (18 eliminated genes)
## 
##   Level 15:  12 nodes to be scored   (63 eliminated genes)
## 
##   Level 14:  25 nodes to be scored   (134 eliminated genes)
## 
##   Level 13:  52 nodes to be scored   (323 eliminated genes)
## 
##   Level 12:  98 nodes to be scored   (875 eliminated genes)
## 
##   Level 11:  197 nodes to be scored  (2843 eliminated genes)
## 
##   Level 10:  351 nodes to be scored  (4742 eliminated genes)
## 
##   Level 9:   474 nodes to be scored  (6005 eliminated genes)
## 
##   Level 8:   553 nodes to be scored  (7931 eliminated genes)
## 
##   Level 7:   645 nodes to be scored  (9661 eliminated genes)
## 
##   Level 6:   555 nodes to be scored  (11010 eliminated genes)
## 
##   Level 5:   362 nodes to be scored  (11782 eliminated genes)
## 
##   Level 4:   201 nodes to be scored  (12237 eliminated genes)
## 
##   Level 3:   90 nodes to be scored   (12431 eliminated genes)
## 
##   Level 2:   17 nodes to be scored   (12507 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12545 eliminated genes)

Biological Process Analysis for DOWN-REGULATED genes: 257 genes

# Wrapper function for topGO analysis 
ResBPDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=BPann, ontology='BP', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=paste0(OutputFolder), fileName='BPDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 3096 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  3 nodes to be scored    (18 eliminated genes)
## 
##   Level 15:  7 nodes to be scored    (38 eliminated genes)
## 
##   Level 14:  19 nodes to be scored   (80 eliminated genes)
## 
##   Level 13:  40 nodes to be scored   (200 eliminated genes)
## 
##   Level 12:  76 nodes to be scored   (768 eliminated genes)
## 
##   Level 11:  148 nodes to be scored  (2566 eliminated genes)
## 
##   Level 10:  260 nodes to be scored  (4273 eliminated genes)
## 
##   Level 9:   385 nodes to be scored  (5540 eliminated genes)
## 
##   Level 8:   463 nodes to be scored  (7383 eliminated genes)
## 
##   Level 7:   562 nodes to be scored  (9140 eliminated genes)
## 
##   Level 6:   501 nodes to be scored  (10850 eliminated genes)
## 
##   Level 5:   339 nodes to be scored  (11710 eliminated genes)
## 
##   Level 4:   186 nodes to be scored  (12224 eliminated genes)
## 
##   Level 3:   86 nodes to be scored   (12428 eliminated genes)
## 
##   Level 2:   17 nodes to be scored   (12505 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12545 eliminated genes)

GOTable(ResBPDown$ResSel, maxGO=20)

Biological Process Analysis for UP-REGULATED genes: 160 genes

ResBPUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=BPann, ontology='BP', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='BPUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 11416 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 14877 GO terms and 33562 relations. )
## 
## Annotating nodes ...............
##  ( 12665 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 2616 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 18:  1 nodes to be scored    (0 eliminated genes)
## 
##   Level 17:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 16:  4 nodes to be scored    (18 eliminated genes)
## 
##   Level 15:  8 nodes to be scored    (48 eliminated genes)
## 
##   Level 14:  14 nodes to be scored   (81 eliminated genes)
## 
##   Level 13:  29 nodes to be scored   (244 eliminated genes)
## 
##   Level 12:  59 nodes to be scored   (673 eliminated genes)
## 
##   Level 11:  120 nodes to be scored  (2555 eliminated genes)
## 
##   Level 10:  241 nodes to be scored  (4325 eliminated genes)
## 
##   Level 9:   321 nodes to be scored  (5436 eliminated genes)
## 
##   Level 8:   389 nodes to be scored  (7194 eliminated genes)
## 
##   Level 7:   468 nodes to be scored  (9012 eliminated genes)
## 
##   Level 6:   415 nodes to be scored  (10625 eliminated genes)
## 
##   Level 5:   291 nodes to be scored  (11604 eliminated genes)
## 
##   Level 4:   160 nodes to be scored  (12164 eliminated genes)
## 
##   Level 3:   77 nodes to be scored   (12392 eliminated genes)
## 
##   Level 2:   16 nodes to be scored   (12501 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12543 eliminated genes)

GOTable(ResBPUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResBPAll$ResSel, TopGOResDown=ResBPDown$ResSel, TopGOResUp=ResBPUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResBPAll$ResSel, ResBPAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResBPDown$ResSel, ResBPDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResBPUp$ResSel, ResBPUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.3 TopGO analysis: Molecular Function

On the basis of the analysis settings, the enrichment for Molecular Function IS performed.

Molecular Function Enrichment for ALL modulated genes: 417 genes

MFann <- topGO::annFUN.org(whichOnto='MF', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResMFAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=MFann, ontology='MF', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='MFAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 563 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  4 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  9 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   22 nodes to be scored   (114 eliminated genes)
## 
##   Level 8:   33 nodes to be scored   (1124 eliminated genes)
## 
##   Level 7:   57 nodes to be scored   (3059 eliminated genes)
## 
##   Level 6:   100 nodes to be scored  (3522 eliminated genes)
## 
##   Level 5:   138 nodes to be scored  (4798 eliminated genes)
## 
##   Level 4:   136 nodes to be scored  (7747 eliminated genes)
## 
##   Level 3:   49 nodes to be scored   (10277 eliminated genes)
## 
##   Level 2:   14 nodes to be scored   (11100 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12866 eliminated genes)

Molecular Function Enrichment for DOWN-REGULATED genes: 257 genes

ResMFDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=MFann, ontology='MF', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='MFDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 482 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  3 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  8 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   17 nodes to be scored   (99 eliminated genes)
## 
##   Level 8:   28 nodes to be scored   (1118 eliminated genes)
## 
##   Level 7:   51 nodes to be scored   (2953 eliminated genes)
## 
##   Level 6:   83 nodes to be scored   (3449 eliminated genes)
## 
##   Level 5:   120 nodes to be scored  (4706 eliminated genes)
## 
##   Level 4:   113 nodes to be scored  (7449 eliminated genes)
## 
##   Level 3:   45 nodes to be scored   (10189 eliminated genes)
## 
##   Level 2:   13 nodes to be scored   (11035 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12864 eliminated genes)

GOTable(ResMFDown$ResSel, maxGO=20)

Molecular Function Analysis for UP-REGULATED genes: 160 genes

ResMFUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=MFann, ontology='MF', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='MFUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 4060 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 4530 GO terms and 5903 relations. )
## 
## Annotating nodes ...............
##  ( 12999 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 368 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  2 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  4 nodes to be scored    (0 eliminated genes)
## 
##   Level 9:   15 nodes to be scored   (49 eliminated genes)
## 
##   Level 8:   18 nodes to be scored   (1020 eliminated genes)
## 
##   Level 7:   28 nodes to be scored   (2959 eliminated genes)
## 
##   Level 6:   65 nodes to be scored   (3311 eliminated genes)
## 
##   Level 5:   86 nodes to be scored   (4299 eliminated genes)
## 
##   Level 4:   100 nodes to be scored  (7231 eliminated genes)
## 
##   Level 3:   37 nodes to be scored   (9831 eliminated genes)
## 
##   Level 2:   12 nodes to be scored   (10830 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (12845 eliminated genes)

GOTable(ResMFUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResMFAll$ResSel, TopGOResDown=ResMFDown$ResSel, TopGOResUp=ResMFUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResMFAll$ResSel, ResMFAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResMFDown$ResSel, ResMFDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResMFUp$ResSel, ResMFUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

5.4 TopGO analysis: Cellular Component

On the basis of the analysis settings, the enrichment for Cellular Component IS performed.

Cellular Component Enrichment for ALL modulated genes: 417 genes

CCann <- topGO::annFUN.org(whichOnto='CC', feasibleGenes=names(GeneVectors$DEGenes), 
                           mapping='org.Hs.eg.db', ID='symbol') %>% inverseList()

# Wrapper function for topGO analysis 
ResCCAll <- topGOResults(Genes=GeneVectors$DEGenes, gene2GO=CCann, ontology='CC', 
                         desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                         EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                         saveRes=TRUE, outDir=OutputFolder, fileName='CCAll')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 403 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  11 nodes to be scored   (0 eliminated genes)
## 
##   Level 10:  32 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   49 nodes to be scored   (492 eliminated genes)
## 
##   Level 8:   56 nodes to be scored   (2042 eliminated genes)
## 
##   Level 7:   69 nodes to be scored   (4236 eliminated genes)
## 
##   Level 6:   62 nodes to be scored   (8006 eliminated genes)
## 
##   Level 5:   52 nodes to be scored   (9742 eliminated genes)
## 
##   Level 4:   35 nodes to be scored   (11624 eliminated genes)
## 
##   Level 3:   34 nodes to be scored   (12705 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13072 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13169 eliminated genes)

#write.table(ResCCAll$ResAll, file=paste0(OutputFolder, 'TopGO/CCAllResults.txt'), sep='\t', row.names=FALSE)

Cellular Component Enrichment for DOWN-REGULATED genes: 257 genes

# Wrapper function for topGO analysis 
ResCCDown <- topGOResults(Genes=GeneVectors$DEGenesDown, gene2GO=CCann, ontology='CC', 
                          desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                          EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                          saveRes=TRUE, outDir=OutputFolder, fileName='CCDown')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 362 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  10 nodes to be scored   (0 eliminated genes)
## 
##   Level 10:  29 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   43 nodes to be scored   (469 eliminated genes)
## 
##   Level 8:   47 nodes to be scored   (1921 eliminated genes)
## 
##   Level 7:   62 nodes to be scored   (4148 eliminated genes)
## 
##   Level 6:   55 nodes to be scored   (7894 eliminated genes)
## 
##   Level 5:   48 nodes to be scored   (9682 eliminated genes)
## 
##   Level 4:   32 nodes to be scored   (11568 eliminated genes)
## 
##   Level 3:   33 nodes to be scored   (12682 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13070 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13169 eliminated genes)

GOTable(ResCCDown$ResSel, maxGO=20)

Cellular Component Analysis for UP-REGULATED genes: 160 genes

# Wrapper function for topGO analysis 
ResCCUp <- topGOResults(Genes=GeneVectors$DEGenesUp, gene2GO=CCann, ontology='CC', 
                        desc=NULL, nodeSize=15, algorithm='weight01', statistic='fisher', 
                        EnTh=params$GoEnTh, PvalTh=params$GoPvalTh, minTerms=12, geneTh=4,
                        saveRes=TRUE, outDir=OutputFolder, fileName='CCUp')
## Gene vector contains levels: 0,1
## 
## Building most specific GOs .....
##  ( 1732 GO terms found. )
## 
## Build GO DAG topology ..........
##  ( 1926 GO terms and 3253 relations. )
## 
## Annotating nodes ...............
##  ( 13224 genes annotated to the GO terms. )
## 
##           -- Weight01 Algorithm -- 
## 
##       the algorithm is scoring 292 nontrivial nodes
##       parameters: 
##           test statistic: fisher
## 
##   Level 11:  5 nodes to be scored    (0 eliminated genes)
## 
##   Level 10:  16 nodes to be scored   (0 eliminated genes)
## 
##   Level 9:   31 nodes to be scored   (332 eliminated genes)
## 
##   Level 8:   41 nodes to be scored   (1366 eliminated genes)
## 
##   Level 7:   47 nodes to be scored   (3782 eliminated genes)
## 
##   Level 6:   49 nodes to be scored   (7811 eliminated genes)
## 
##   Level 5:   42 nodes to be scored   (9611 eliminated genes)
## 
##   Level 4:   28 nodes to be scored   (11569 eliminated genes)
## 
##   Level 3:   30 nodes to be scored   (12685 eliminated genes)
## 
##   Level 2:   2 nodes to be scored    (13070 eliminated genes)
## 
##   Level 1:   1 nodes to be scored    (13169 eliminated genes)

GOTable(ResCCUp$ResSel, maxGO=20)

Result visualization: Barplot

topGOBarplotAll(TopGOResAll=ResCCAll$ResSel, TopGOResDown=ResCCDown$ResSel, TopGOResUp=ResCCUp$ResSel, 
                terms=8, pvalTh=0.01, plotTitle=NULL)

Top Terms associated Genes

All

plotGenesInTerm_v2(ResCCAll$ResSel, ResCCAll$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle=NULL, Interactive=FALSE)

Down

plotGenesInTerm_v2(ResCCDown$ResSel, ResCCDown$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Down DEGs', Interactive=FALSE, fillCol='blue')

Up

plotGenesInTerm_v2(ResCCUp$ResSel, ResCCUp$GOdata, SE_DEA, nterms=8, ngenes=12, plotTitle='Genes in Term - Up DEGs', Interactive=FALSE, fillCol='red')

6. Savings

SessionInfo <- sessionInfo()
Date <- date()
#
save.image(paste0(OutputFolder, Dataset, 'FunctionalAnalysisWorkspace.RData'))

SessionInfo

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] DESeq2_1.38.3               sechm_1.6.0                
##  [3] SummarizedExperiment_1.28.0 GenomicRanges_1.50.2       
##  [5] GenomeInfoDb_1.34.9         MatrixGenerics_1.10.0      
##  [7] matrixStats_0.63.0          dplyr_1.1.0                
##  [9] tidyr_1.3.0                 data.table_1.14.8          
## [11] topGO_2.50.0                SparseM_1.81               
## [13] GO.db_3.16.0                graph_1.76.0               
## [15] viridis_0.6.2               viridisLite_0.4.1          
## [17] RColorBrewer_1.1-3          gridExtra_2.3              
## [19] org.Hs.eg.db_3.16.0         AnnotationDbi_1.60.0       
## [21] IRanges_2.32.0              S4Vectors_0.36.1           
## [23] Biobase_2.58.0              BiocGenerics_0.44.0        
## [25] ggplot2_3.4.1               DT_0.27                    
## [27] RNASeqBulkExploratory_0.2.1
## 
## loaded via a namespace (and not attached):
##  [1] Rtsne_0.16             colorspace_2.1-0       rjson_0.2.21          
##  [4] ellipsis_0.3.2         circlize_0.4.15        XVector_0.38.0        
##  [7] GlobalOptions_0.1.2    clue_0.3-64            rstudioapi_0.14       
## [10] farver_2.1.1           bit64_4.0.5            fansi_1.0.4           
## [13] codetools_0.2-19       doParallel_1.0.17      cachem_1.0.7          
## [16] geneplotter_1.76.0     knitr_1.42             jsonlite_1.8.4        
## [19] annotate_1.76.0        cluster_2.1.4          png_0.1-8             
## [22] compiler_4.2.1         httr_1.4.5             lazyeval_0.2.2        
## [25] Matrix_1.5-3           fastmap_1.1.1          cli_3.6.1             
## [28] htmltools_0.5.4        tools_4.2.1            gtable_0.3.1          
## [31] glue_1.6.2             GenomeInfoDbData_1.2.9 V8_4.2.2              
## [34] Rcpp_1.0.10            jquerylib_0.1.4        vctrs_0.6.2           
## [37] Biostrings_2.66.0      iterators_1.0.14       crosstalk_1.2.0       
## [40] xfun_0.37              stringr_1.5.0          lifecycle_1.0.3       
## [43] XML_3.99-0.13          ca_0.71.1              zlibbioc_1.44.0       
## [46] scales_1.2.1           TSP_1.2-2              parallel_4.2.1        
## [49] ComplexHeatmap_2.14.0  yaml_2.3.7             curl_5.0.0            
## [52] memoise_2.0.1          sass_0.4.5             stringi_1.7.12        
## [55] RSQLite_2.3.0          highr_0.10             randomcoloR_1.1.0.1   
## [58] foreach_1.5.2          seriation_1.4.1        BiocParallel_1.32.5   
## [61] shape_1.4.6            rlang_1.1.1            pkgconfig_2.0.3       
## [64] bitops_1.0-7           evaluate_0.20          lattice_0.20-45       
## [67] purrr_1.0.1            labeling_0.4.2         htmlwidgets_1.6.1     
## [70] bit_4.0.5              tidyselect_1.2.0       magrittr_2.0.3        
## [73] R6_2.5.1               generics_0.1.3         DelayedArray_0.24.0   
## [76] DBI_1.1.3              pillar_1.8.1           withr_2.5.0           
## [79] KEGGREST_1.38.0        RCurl_1.98-1.10        tibble_3.2.1          
## [82] crayon_1.5.2           utf8_1.2.3             plotly_4.10.1         
## [85] rmarkdown_2.20         GetoptLong_1.0.5       locfit_1.5-9.7        
## [88] grid_4.2.1             blob_1.2.3             digest_0.6.31         
## [91] xtable_1.8-4           munsell_0.5.0          registry_0.5-1        
## [94] bslib_0.4.2

Date

## [1] "Fri Jul 18 19:11:18 2025"

TopGO - CTL04

Date: July 18, 2025

1. Environment Set Up

2. Data Upload

2.1 Load Data from DEA

2.2 Add DEA results to SE

3. RESULTS NAVIGATION: Interactive Table

4. RESULTS VISUALIZATION

4.1 Volcano plot

4.3 Heatmap for significant genes

5. TOPGO for Gene Ontology Enrichment analysis

5.1 Selection of modulated genes and generation of gene vectors

5.2 TopGO analysis: Biological Process

Biological Process Analysis for ALL modulated genes: 417 genes

Biological Process Analysis for DOWN-REGULATED genes: 257 genes

Biological Process Analysis for UP-REGULATED genes: 160 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.3 TopGO analysis: Molecular Function

Molecular Function Enrichment for ALL modulated genes: 417 genes

Molecular Function Enrichment for DOWN-REGULATED genes: 257 genes

Molecular Function Analysis for UP-REGULATED genes: 160 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

5.4 TopGO analysis: Cellular Component

Cellular Component Enrichment for ALL modulated genes: 417 genes

Cellular Component Enrichment for DOWN-REGULATED genes: 257 genes

Cellular Component Analysis for UP-REGULATED genes: 160 genes

Result visualization: Barplot

Top Terms associated Genes

All

Down

Up

6. Savings