Values of RMarkdown parameters
for (i in 1:length(params))
print(paste('Parameter:', names(params)[i], ' - Value:', params[[i]], '- Class:', class(params[[i]])))
## [1] "Parameter: Dataset - Value: CTL04 - Class: character"
## [1] "Parameter: SE - Value: ~/DataDir/bulkRNASeq/3.DataExploration/CTL04/Output/FemaleLineSE_Bio.rds - Class: character"
## [1] "Parameter: OutputFolder - Value: ~/DataDir/bulkRNASeq/4.GeneSignatures/CTL04/Output/MetabolismSig/ - Class: character"
## [1] "Parameter: CpmFilt - Value: 2 - Class: integer"
## [1] "Parameter: SampleFilt - Value: 4 - Class: integer"
library(RNASeqBulkExploratory) #Our Package
library(DT)
library(gridExtra)
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(SEtools)
## Loading required package: sechm
library(sechm)
library(randomcoloR)
library(ggplot2)
library(tidyHeatmap)
## ========================================
## tidyHeatmap version 1.8.1
## If you use tidyHeatmap in published research, please cite:
## 1) Mangiola et al. tidyHeatmap: an R package for modular heatmap production
## based on tidy principles. JOSS 2020.
## 2) Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
## This message can be suppressed by:
## suppressPackageStartupMessages(library(tidyHeatmap))
## ========================================
##
## Attaching package: 'tidyHeatmap'
## The following object is masked from 'package:stats':
##
## heatmap
<- params$Dataset
Dataset <- params$OutputFolder
OutputFolder
if (dir.exists(OutputFolder) == FALSE) {
dir.create(OutputFolder, recursive=FALSE)
}
SE object created from exploratory analysis. Steps already performed: gene selection according to biotype.
<- readRDS(params$SE) SE_Bio
SE object containing information about 19900 genes in 66 samples.
if(! identical(names(assays(SE_Bio)$counts), rownames(colData(SE_Bio)))){
stop('Inconsistencies in sample metadata for SE object')
}
if(! identical(names(assays(SE_Bio)$counts), colData(SE_Bio)$HRID)){
stop('Inconsistencies in sample metadata for SE object')
}
<- read.table(file = '~/DataDir/bulkRNASeq/1.GeneLength/Output/GenecodeV35GeneLength.txt',
GeneLength sep='\t', header=TRUE)
<- dplyr::left_join(as.data.frame(rowData(SE_Bio))[,1:3], GeneLength, by=c('Gene'='Gene')) SelLenght
if(identical(SelLenght$Gene, rowData(SE_Bio)$Gene)){
rowData(SE_Bio)$GeneLength <- SelLenght$Length
print('Gene Lengh information added to SE')
else {
} print('Information on gene lenght is not compatible with SE object. Gene Lenght information not added to SE')
}## [1] "Gene Lengh information added to SE"
<- read.table(file = '~/DataDir/bulkRNASeq/4.GeneSignatures/GeneSets/MetabolismGenes.txt',
GS sep='\t', header=TRUE, row.names = NULL)
names(GS)[3] <- 'Population'
%>%
GS data.frame() %>%
::datatable() DT
Not necessary.
<- params$CpmFilt
CpmFilt if(is.numeric(CpmFilt)==FALSE){
stop('ERROR! Cpm threshold for gene filtering has a non-numeric value')
}
<- ifelse(params$SampleFilt=='Default', dim(SE_Bio)[2]/2, params$SampleFilt)
SampleFilt if(is.numeric(SampleFilt)==FALSE){
stop('ERROR! Sample threshold for gene filtering has a non-numeric value')
}
Genes having an expression lower than 2 in at least 4 samples are filtered out.
$lib.size
SE_Bio## MTR_044_S1_CTL MTR_045_S2_CTL MTR_046_S3_CTL
## 35896092 31708728 30084298
## MTR_047_S4_CTL MTR_048_S65_DMSO MTR_049_S5_DMSO
## 32298211 43316736 33355586
## MTR_050_S6_DMSO MTR_051_S66_DMSO MTR_052_S7_AhHyd_Ag
## 30026932 36026174 34905456
## MTR_053_S8_AhHyd_Ag MTR_054_S9_AhHyd_Ag MTR_055_S10_AhHyd_Ag
## 39115824 35254183 36508256
## MTR_056_S11_AhHyd_Inh MTR_057_S12_AhHyd_Inh MTR_058_S13_AhHyd_Inh
## 32446046 39181238 34663693
## MTR_059_S14_AhHyd_Inh MTR_060_S15_Ret_Ag MTR_061_S16_Ret_Ag
## 38124739 45105841 37421078
## MTR_062_S17_Ret_Ag MTR_063_S18_Ret_Ag MTR_064_S19_Ret_Inh
## 32185720 35203919 35563706
## MTR_065_S20_Ret_Inh MTR_066_S21_Ret_Inh MTR_067_S22_Ret_Inh
## 42993502 41486813 41593609
## MTR_068_S23_Andr_Ag MTR_069_S24_Andr_Ag MTR_070_S25_Andr_Ag
## 32815429 40906632 31208496
## MTR_071_S26_Andr_Ag MTR_072_S27_Andr_Inh MTR_073_S28_Andr_Inh
## 40030676 33206709 36578304
## MTR_074_S29_Andr_Inh MTR_075_S30_Andr_Inh MTR_076_S31_LivX_Ag
## 33775034 30169516 40306389
## MTR_077_S32_LivX_Ag MTR_078_S33_LivX_Ag MTR_079_S34_LivX_Ag
## 55537385 38873337 44661327
## MTR_080_S35_LivX_Inh MTR_081_S36_LivX_Inh MTR_082_S37_LivX_Inh
## 36255300 43640070 33774429
## MTR_083_S38_LivX_Inh MTR_084_S39_GC_Ag MTR_085_S40_GC_Ag
## 31448744 49615558 39058430
## MTR_086_S41_GC_Ag MTR_087_S42_GC_Ag MTR_088_S43_GC_Inh
## 40949271 45343061 37632428
## MTR_089_S44_GC_Inh MTR_090_S45_GC_Inh MTR_091_S46_GC_Inh
## 41495325 46713448 45482531
## MTR_092_S47_Estr_Ag MTR_093_S48_Estr_Ag MTR_094_S49_Estr_Ag
## 33976888 36201983 39420602
## MTR_095_S50_Estr_Ag MTR_096_S51_Estr_Inh MTR_097_S52_Estr_Inh
## 34215705 32466391 34977751
## MTR_098_S53_Estr_Inh MTR_099_S54_Estr_Inh MTR_100_S55_Thyr_Ag
## 32309780 42816826 44343983
## MTR_101_S56_Thyr_Ag MTR_102_S57_Thyr_Ag MTR_103_S58_Thyr_Ag
## 44983604 47883046 32958622
## MTR_104_S59_Thyr_Inh MTR_105_S60_Thyr_Inh MTR_106_S61_Thyr_Inh
## 33828268 36810075 35816266
## MTR_107_S62_Thyr_Inh MTR_108_S63_DMSO MTR_109_S64_DMSO
## 39757302 45479386 40315145
<- filterSE(SE_Bio, cpmTh=CpmFilt, sampleTh=SampleFilt)
SE_Filt ## UPDATED library.size slot
<- normTmmSE(SE_Filt, useNormFactors=TRUE, priorCount=0.25)
SE_Filt
assays(SE_Filt)$fpkm <- edgeR::rpkm(SE_Filt, normalized.lib.sizes = TRUE, gene.length=rowData(SE_Filt)$GeneLength)
assays(SE_Filt)$logfpkm <- edgeR::rpkm(SE_Filt, normalized.lib.sizes = TRUE, gene.length=rowData(SE_Filt)$GeneLength,
log=TRUE, prior.count=0.25)
After filtering out lowly-expressed genes, the dataset is structured in 13532 genes measured in 66 samples.
rownames(SE_Filt) <- rowData(SE_Filt)$GeneName
<- SE_Filt[!duplicated(rownames(SE_Filt)), ] SE_Filt
Calculate the fold-change compared to the controls (DMSO). This is done on logcpm.
<- as.vector(which(SE_Filt$Condition=='DMSO'))
DMSO <- SEtools::log2FC(SE_Filt, fromAssay='logcpm', controls=DMSO, isLog=TRUE, toAssay='log2FC') SE_Filt
colData(SE_Filt)$Condition <- factor(colData(SE_Filt)$Condition, levels=c("CTL", "DMSO", "AhHyd_Ag", "AhHyd_Inh", "Andr_Ag", "Andr_Inh", "Estr_Ag", "Estr_Inh", "GC_Ag", "GC_Inh", "LivX_Ag", "LivX_Inh", "Ret_Ag", "Ret_Inh", "Thyr_Ag", "Thyr_Inh" ))
metadata(SE_Filt)$anno_colors <- list(Condition = c('DMSO' = 'grey30', 'CTL' = 'azure3',
'AhHyd_Ag'='#F8766D', 'AhHyd_Inh'='#F8766D50',
'Andr_Ag'='#fccb17', 'Andr_Inh'='#C49A0050',
"Estr_Ag"= '#53B400', "Estr_Inh"= '#53B40050',
'GC_Ag' = '#00C094', 'GC_Inh' = '#00C09450',
'LivX_Ag' = '#00B6EB', 'LivX_Inh' = '#00B6EB50',
'Ret_Ag' = '#A58AFF', 'Ret_Inh' = '#A58AFF50',
'Thyr_Ag' = '#FB61D7', 'Thyr_Inh' = '#FB61D750'
) )
<- GS %>% dplyr::pull(GeneName)
Genes
! Genes %in% row.names(SE_Filt)]
Genes[## [1] "CYP11A1" "CYP17A1" "CYP11B1" "CYP11B2" "CYP21A1" "HSD3B2" "AKR1C4"
## [8] "HSD11B1" "HSD11B2" "HSD17B3" "HSD3B1" "AKR1C4" "SRD5A2" "CYP19A1"
## [15] "HMGCS2" "IDI2"
rowData(SE_Filt)$Signatures <- NA
for (i in which(rowData(SE_Filt)$GeneName %in% Genes)){
<- row.names(SE_Filt)[i]
gene rowData(SE_Filt)[i, 'Signatures'] <-GS[GS$GeneName==gene, "Population"]
}
#color for signatures
metadata(SE_Filt)$anno_colors$Signatures <- c("Steroid biogenesis" = "#e6194b", "Cholesterol synthesis" = "#4363d8")
#color for heatmaps
<- c('darkblue', "purple","white","lightgoldenrod1", 'goldenrod1') ScaledCols
The following genes are not selected as expressed: CYP11A1, CYP17A1, CYP11B1, CYP11B2, CYP21A1, HSD3B2, AKR1C4, HSD11B1, HSD11B2, HSD17B3, HSD3B1, AKR1C4, SRD5A2, CYP19A1, HMGCS2, IDI2.
<- assays(SE_Filt[row.names(SE_Filt) %in% Genes, ])$logfpkm %>% quantile(seq(0,1, by=0.1))
break_coord <- viridisLite::cividis(length(seq(head(break_coord,1), tail(break_coord,1), by=0.2)))
ExpCols
<- sechm::sechm(SE_Filt, features=Genes, assayName="logfpkm", gaps_at = "Condition", gaps_row="Signatures", top_annotation = c("Condition"), show_rownames = TRUE, left_annotation = c("Signatures"), sortRowsOn=NULL, cluster_rows=FALSE, cluster_cols = FALSE, show_colnames=TRUE, hmcols=ExpCols, breaks=seq(head(break_coord,1), tail(break_coord,1), by=0.2), column_title = "Logfpkm")
p1 # breaks FALSE to avoid a symmetrical scale
p1
Saving to PDF file
<- sechm::sechm(SE_Filt, features=Genes, assayName="logfpkm", gaps_at = "Condition", gaps_row="Signatures", top_annotation = c("Condition"), show_rownames = TRUE, left_annotation = c("Signatures", "UpregulatedBy", "DownregulatedBy"), sortRowsOn=NULL, cluster_rows=FALSE, cluster_cols = FALSE, show_colnames=TRUE, hmcols=ScaledCols, do.scale=TRUE, breaks=0.8, column_title = "Scaled Logfpkm", )
p2 p2
Saving to PDF file
<- sechm::sechm(SE_Filt, features=Genes, assayName="log2FC", gaps_at = "Condition", gaps_row="Signatures", top_annotation = c("Condition"), show_rownames = TRUE, left_annotation = c("Signatures", "UpregulatedBy", "DownregulatedBy"), sortRowsOn=NULL, cluster_rows=FALSE, cluster_cols = FALSE, show_colnames=TRUE, hmcols=ScaledCols,
p3 do.scale=FALSE, breaks = 0.85, column_title = "LogFC versus DMSO")
p3
Saving to PDF file
<- sessionInfo()
SessionInfo <- date()
Date save.image(paste0(OutputFolder, '/', Dataset, 'SigMetab_Workspace.RData'))
Date
## [1] "Fri Jul 18 13:19:03 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tidyHeatmap_1.8.1 ggplot2_3.4.1
## [3] randomcoloR_1.1.0.1 SEtools_1.12.0
## [5] sechm_1.6.0 SummarizedExperiment_1.28.0
## [7] Biobase_2.58.0 GenomicRanges_1.50.2
## [9] GenomeInfoDb_1.34.9 IRanges_2.32.0
## [11] S4Vectors_0.36.1 BiocGenerics_0.44.0
## [13] MatrixGenerics_1.10.0 matrixStats_0.63.0
## [15] gridExtra_2.3 DT_0.27
## [17] RNASeqBulkExploratory_0.2.1
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.16 colorspace_2.1-0 rjson_0.2.21
## [4] ellipsis_0.3.2 circlize_0.4.15 XVector_0.38.0
## [7] GlobalOptions_0.1.2 clue_0.3-64 rstudioapi_0.14
## [10] bit64_4.0.5 AnnotationDbi_1.60.0 fansi_1.0.4
## [13] splines_4.2.1 codetools_0.2-19 doParallel_1.0.17
## [16] cachem_1.0.7 geneplotter_1.76.0 knitr_1.42
## [19] jsonlite_1.8.4 annotate_1.76.0 cluster_2.1.4
## [22] png_0.1-8 pheatmap_1.0.12 compiler_4.2.1
## [25] httr_1.4.5 Matrix_1.5-3 fastmap_1.1.1
## [28] limma_3.54.1 cli_3.6.1 htmltools_0.5.4
## [31] tools_4.2.1 gtable_0.3.1 glue_1.6.2
## [34] GenomeInfoDbData_1.2.9 dplyr_1.1.0 V8_4.2.2
## [37] Rcpp_1.0.10 jquerylib_0.1.4 vctrs_0.6.2
## [40] Biostrings_2.66.0 nlme_3.1-162 crosstalk_1.2.0
## [43] iterators_1.0.14 xfun_0.37 stringr_1.5.0
## [46] openxlsx_4.2.5.2 lifecycle_1.0.3 dendextend_1.16.0
## [49] XML_3.99-0.13 ca_0.71.1 edgeR_3.40.2
## [52] zlibbioc_1.44.0 scales_1.2.1 TSP_1.2-2
## [55] parallel_4.2.1 RColorBrewer_1.1-3 ComplexHeatmap_2.14.0
## [58] yaml_2.3.7 curl_5.0.0 memoise_2.0.1
## [61] sass_0.4.5 stringi_1.7.12 RSQLite_2.3.0
## [64] highr_0.10 genefilter_1.80.3 foreach_1.5.2
## [67] seriation_1.4.1 zip_2.2.2 BiocParallel_1.32.5
## [70] shape_1.4.6 rlang_1.1.1 pkgconfig_2.0.3
## [73] bitops_1.0-7 evaluate_0.20 lattice_0.20-45
## [76] purrr_1.0.1 patchwork_1.1.2 htmlwidgets_1.6.1
## [79] bit_4.0.5 tidyselect_1.2.0 magrittr_2.0.3
## [82] DESeq2_1.38.3 R6_2.5.1 generics_0.1.3
## [85] DelayedArray_0.24.0 DBI_1.1.3 withr_2.5.0
## [88] mgcv_1.8-41 pillar_1.8.1 survival_3.5-3
## [91] KEGGREST_1.38.0 RCurl_1.98-1.10 tibble_3.2.1
## [94] crayon_1.5.2 utf8_1.2.3 rmarkdown_2.20
## [97] viridis_0.6.2 GetoptLong_1.0.5 locfit_1.5-9.7
## [100] grid_4.2.1 sva_3.46.0 data.table_1.14.8
## [103] blob_1.2.3 digest_0.6.31 xtable_1.8-4
## [106] tidyr_1.3.0 munsell_0.5.0 viridisLite_0.4.1
## [109] registry_0.5-1 bslib_0.4.2