Check the expression levels and behavior through post-conceptional weeks from Brainspan (pre-natal cortex samples).
Library and source upload
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
Values of RMarkdown parameters
for (i in 1:length(params))
print(paste('Parameter:', names(params)[i], ' - Value:', params[[i]], '- Class:', class(params[[i]])))
## [1] "Parameter: Dataset - Value: BrainSpan - Class: character"
## [1] "Parameter: RpkmListPrel - Value: ~/DataDir/ExternalData/BulkData/BrainSpan/OutputPreparation/PreCortexRpkmList.rds - Class: character"
## [1] "Parameter: OutputFolder - Value: ~/DataDir/ExternalData/BulkData/BrainSpan/Output/ - Class: character"
Source helper file
source("../SignatureHelper.R")
Folder organization
<- params$OutputFolder
OutputFolder
if (dir.exists(OutputFolder) == FALSE) {
dir.create(OutputFolder, showWarnings=TRUE, recursive=TRUE)
}
I upload:
<- readRDS(params$RpkmListPrel)
RpkmListPrel summary(row.names(RpkmListPrel$Log2Rpkm))
## Length Class Mode
## 44507 character character
<- (!(RpkmListPrel$Meta$Stage %in% c(25, 26, 35))) #I eliminate the PCW that were found as outliers
Sel
<- list()
RpkmList $Rpkm <- RpkmListPrel$Rpkm[, Sel]
RpkmList$Log2Rpkm <- RpkmListPrel$Log2Rpkm[, Sel]
RpkmList$Meta <- RpkmListPrel$Meta[Sel, ]
RpkmList
if(!identical(colnames(RpkmList$Log2Rpkm)[1:157], RpkmList$Meta$ID)){
stop('Inconsistency in Expression and Metadata matrices!')
}
### Check for duplicated gene names
table(duplicated(RpkmList$Log2Rpkm$GeneName))
##
## FALSE
## 44507
After removal of week 25, 26 and 35:
<- read.table(file = '~/DataDir/ExternalData/Receptors/ReceptorsComplete.txt',
GS sep='\t', header=TRUE)
colnames(GS)[2] <- "Population"
as.data.frame(GS) %>%
::datatable(class='hover', rownames=FALSE, caption='Hormonal receptors',
DTfilter='top', escape=TRUE, extension='Buttons',
options=list(pageLength=10, dom='Bfrtip',
buttons=list(I('colvis'), c('csv', 'excel')))
)
<- GS[!duplicated(GS$GeneName),]
GSUnique $GeneName[!(GSUnique$GeneName %in% RpkmList$Log2Rpkm$GeneName)] GSUnique
## character(0)
<- GSUnique[(GSUnique$GeneName %in% RpkmList$Log2Rpkm$GeneName), ]
GSUnique
summary(GS)
## GeneName Population
## Length:39 Length:39
## Class :character Class :character
## Mode :character Mode :character
The signature is composed by 39genes classified in 11 categories.
CuratedSig lists is created:
<- list() CuratedSig
# Retrieval of expression values for gene signatures
$LogRpkm <- LogFpkmSignature(MetaData=RpkmList$Meta, LogFpkmData=RpkmList$Log2Rpkm[,1:157],
CuratedSigGeneSig=unique(GSUnique$GeneName))
print(paste('Genes: ', dim(CuratedSig$LogRpkm)[2]-4))
## [1] "Genes: 44"
print(paste('Samples: ', dim(CuratedSig$LogRpkm)[1]))
## [1] "Samples: 157"
# Calculation of mean expression across stages
$LogRpkmMean <- SigLogFpkmMean(CuratedSig$LogRpkm, StartCol=10,
CuratedSigGroupCol='Stage', MetaInfo=GSUnique,
Arrange='Population')
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(StartCol)
##
## # Now:
## data %>% select(all_of(StartCol))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'GeneName'. You can override using the
## `.groups` argument.
print(paste('MeanLog2Rpkm Genes', dim(CuratedSig$LogRpkmMean)[1]))
## [1] "MeanLog2Rpkm Genes 390"
print(paste('Min Value', round(min(CuratedSig$LogRpkmMean$ExpMean), 2)))
## [1] "Min Value 0"
print(paste('Max Value', round(max(CuratedSig$LogRpkmMean$ExpMean), 2)))
## [1] "Max Value 6.57"
<- scales::hue_pal()(12)
ColVec
<- CuratedSig$LogRpkmMean %>% dplyr::mutate(Stage=factor(Stage, levels=c('8', '9', '12', '13', '16',
LogRpkmMeanRel '17', '19', '21', '24', '37')))
$Lolli <- LolliPopCols(Data=LogRpkmMeanRel, PointSize=2,
CuratedSigSegSize=1, yLow=0, yHigh=7, CeilDown=0,
CeilUp=7, unit='LogRpkm', cols=ColVec,
Shape=15, GeneSize=4.2, filterZero=FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
$Lolli CuratedSig
ggsave(plot=CuratedSig$Lolli, filename=paste0(OutputFolder, 'BSpan_LogRpkm.pdf'), width=10, height=10)
ggsave(plot=CuratedSig$Lolli, filename=paste0(OutputFolder, 'BSpan_LogRpkm.png'), width=10, height=10)
$LogRpkmDelta <- DeltaLogFpkmMean(CuratedSig$LogRpkmMean,
CuratedSigGroupCol='Stage', DiscardStage=NULL,
RefStage='S8', Arrange='Population',
MetaCols=2, Th=0, Substitute=NA)
print(paste('DeltaLogFpkm Genes', dim(CuratedSig$LogRpkmDelta)[1]))
## [1] "DeltaLogFpkm Genes 351"
print(paste('Min Value', round(min(CuratedSig$LogRpkmDelta$ExpMean, na.rm=T),2)))
## [1] "Min Value -1.37"
print(paste('Max Value', round(max(CuratedSig$LogRpkmDelta$ExpMean, na.rm=T),2)))
## [1] "Max Value 3.78"
<- CuratedSig$LogRpkmDelta %>% dplyr::mutate(Stage=factor(Stage, levels=c('D2_S9_S8', 'D3_S12_S8', 'D4_S13_S8', 'D5_S16_S8', 'D6_S17_S8',
LogRpkmDeltaRel 'D7_S19_S8', 'D8_S21_S8', 'D9_S24_S8', 'D10_S37_S8')))
$LolliDelta <- LolliPopCols(LogRpkmDeltaRel, PointSize=4, SegSize=1.5,
CuratedSigcols=ColVec, yLow=-2, yHigh=4, CeilUp=4, CeilDown=-2,
unit='DeltaLogRpkm', Shape=18, GeneSize=5, filterZero=FALSE)
$LolliDelta CuratedSig
ggsave(plot=CuratedSig$LolliDelta, filename=paste0(OutputFolder, 'BSpan_DeltaLogRpkm.pdf'), width=10, height=10)
ggsave(plot=CuratedSig$LolliDelta, filename=paste0(OutputFolder, 'BSpan_DeltaLogRpkm.png'), width=10, height=10)
save.image(file=paste0(OutputFolder, 'AnalysisWorkspace.RData'))
<- sessionInfo()
SessionInfo <- date() Date
Date
## [1] "Tue Jul 22 19:02:44 2025"
SessionInfo
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.4.1 dplyr_1.1.0 tidyr_1.3.0
##
## loaded via a namespace (and not attached):
## [1] highr_0.10 pillar_1.8.1 bslib_0.4.2 compiler_4.2.1
## [5] jquerylib_0.1.4 tools_4.2.1 digest_0.6.31 jsonlite_1.8.4
## [9] evaluate_0.20 lifecycle_1.0.3 tibble_3.2.1 gtable_0.3.1
## [13] pkgconfig_2.0.3 rlang_1.1.1 cli_3.6.1 rstudioapi_0.14
## [17] crosstalk_1.2.0 yaml_2.3.7 xfun_0.37 fastmap_1.1.1
## [21] withr_2.5.0 knitr_1.42 systemfonts_1.0.4 htmlwidgets_1.6.1
## [25] generics_0.1.3 vctrs_0.6.2 sass_0.4.5 grid_4.2.1
## [29] DT_0.27 tidyselect_1.2.0 glue_1.6.2 R6_2.5.1
## [33] textshaping_0.3.6 fansi_1.0.4 rmarkdown_2.20 farver_2.1.1
## [37] purrr_1.0.1 magrittr_2.0.3 ellipsis_0.3.2 scales_1.2.1
## [41] htmltools_0.5.4 colorspace_2.1-0 ragg_1.2.3 labeling_0.4.2
## [45] utf8_1.2.3 munsell_0.5.0 cachem_1.0.7 crayon_1.5.2