Data preparation

SFARI genes were downloaded from https://gene.sfari.org/database/gene-scoring/

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
SFARI <- read.csv("SFARI-Gene_genes_06-20-2019release_07-13-2019export.csv")
SFARIgenes <- list()
SFARIgenes$score1 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==1)])
SFARIgenes$score2 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==2)])
SFARIgenes$score3 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==3)])
SFARIgenes$score4 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==4)])
SFARIgenes$score5 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==5)])
SFARIgenes$score6 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==6)])
SFARIgenesAll<- na.omit(unique(unlist(SFARIgenes)))
SFARIgenesAll <- str_squish(SFARIgenesAll)

AutismKB genes were downloaded from http://db.cbi.pku.edu.cn/autismkb_v2/index.php

AutismKB <- read.table("AutismKB.txt", header = F, sep = "\t")
colnames(AutismKB) <- c("Gene Symbol",  "Entrez ID",    "Genome-Wide Association Studies",  "CNV/SV Studies",   "Linkage Analyses", "Low-Scale Genetic Association Studies",    "Expression Profilings",    "NGS de novo Mutation Studies", "NGS Mosaic Mutation Studies",  "NGS Other Studies",    "Low-Scale Gene Studies",   "total score")
AutismKBgenes<- na.omit(unique(as.character(AutismKB$`Gene Symbol`)))
AutismKBgenes <- str_squish(AutismKBgenes)

Decipher genes were downloaded from https://decipher.sanger.ac.uk/about#downloads/data

Decipher <- read.csv("DDG2P_16_7_2019.csv")
DecipherNeuro <- Decipher[c(grep("autism", Decipher$disease.name,ignore.case = T),grep("intellectual", Decipher$disease.name,ignore.case = T),grep("neurodevelopm", Decipher$disease.name, ignore.case = T)),]
DecipherNeurogenes <- na.omit(unique(as.character(DecipherNeuro$gene.symbol)))
DecipherNeurogenes <- str_squish(DecipherNeurogenes)

MSSNG genes were downloaded from https://www.ncbi.nlm.nih.gov/pubmed/28263302 Supplementary Table 5

library(readxl)
MSSNG <- suppressMessages(read_excel("nn.4524-S7.xlsx"))
colnames(MSSNG) <- MSSNG[1,]
MSSNG <- MSSNG[-1,] 
MSSNGgenes <- na.omit(unique(as.character(MSSNG$gene_symbol)))
MSSNGgenes <- str_squish(MSSNGgenes)

ASDsevere genes were downloaded from https://www.nature.com/articles/s41436-018-0380-2 Supplementary Table S6

ASDsevere <- suppressMessages(read_excel("41436_2018_380_MOESM7_ESM.xlsx"))
ASDsevereGenes <- na.omit(unique(as.character(ASDsevere$Gene)))
ASDsevereGenes <- str_squish(ASDsevereGenes)

ID genes were downloaded from https://www.nature.com/articles/ng.3792 Supplementary Table S2

ID <- suppressMessages(read_excel("41588_2017_BFng3792_MOESM20_ESM.xlsx"))
colnames(ID) <- ID[1,]
ID <- ID[-1,] 
IDGenes <- na.omit(unique(as.character(ID$Gene)))
IDGenes <- str_squish(IDGenes)

OMIM genes were downloaded from https://www.omim.org/downloads/ I then select only the genes that are related to neurodevelopmental phenotypes

morbidmap <- read.table("morbidmap.txt",sep='\t', comment.char='#',fill = T, header = F,quote = "" )
colnames(morbidmap) <-c("Phenotype",    "Gene Symbols", "MIM Number",   "Cyto Location")

#run this chunck if you want to check the specific phenopyes selected
#morbidmap$Phenotype[c(grep("autism", morbidmap$Phenotype,ignore.case = T),grep("intellectual", morbidmap$Phenotype,ignore.case = T),grep("neurodevelopm", morbidmap$Phenotype,ignore.case = T))]

NeuroOmim <- morbidmap[c(grep("autism", morbidmap$Phenotype,ignore.case = T),grep("intellectual", morbidmap$Phenotype,ignore.case = T),grep("neurodevelopm", morbidmap$Phenotype,ignore.case = T)),]

NeuroOmimGenes <- as.character(morbidmap$`Gene Symbols`[c(grep("autism", morbidmap$Phenotype,ignore.case = T),grep("intellectual", morbidmap$Phenotype,ignore.case = T),grep("neurodevelopm", morbidmap$Phenotype,ignore.case = T))])
NeuroOmimGenes <- strsplit(NeuroOmimGenes,split = ',')
NeuroOmimGenes <- na.omit(unique(unlist(NeuroOmimGenes)))
NeuroOmimGenes <- str_squish(NeuroOmimGenes)

OmimGenes <- as.character(morbidmap$`Gene Symbols`)
OmimGenes <- strsplit(OmimGenes,split = ',')
OmimGenes <- na.omit(unique(unlist(OmimGenes)))
OmimGenes <- str_squish(OmimGenes)

GDI indexes were downloaded from http://lab.rockefeller.edu/casanova/GDI

GDI <- read.table("GDI_full_10282015.txt", header = T, sep = "\t")

SingleCellASD genes were downloaded from Supplemntary Data 4 at https://science.sciencemag.org/content/suppl/2019/05/15/364.6441.685.DC1 selecting in particular the clusters of layers 2-3 neurons and protoplasmic astrocytes, to highlight the top cell specific differentially expressed genes, as shown in the paper https://science.sciencemag.org/content/364/6441/685.long

scASDgenes <- read_excel("aav8130_Data-S4.xls")
SingleCellASDgenes <- na.omit(unique(as.character(scASDgenes$`Gene name`)))
SingleCellASDgenes <- str_squish(SingleCellASDgenes)

NeuronASDgenes<- na.omit(unique(as.character(scASDgenes$`Gene name`)[which(scASDgenes$`Cell type`=="L2/3")]))
NeuronASDgenes <- str_squish(NeuronASDgenes)

AstrocyteASDgenes<- na.omit(unique(as.character(scASDgenes$`Gene name`)[which(scASDgenes$`Cell type`=="AST-PP")]))
AstrocyteASDgenes <- str_squish(AstrocyteASDgenes)

iPSYCH consortium is publishing the largest exome sequencing study of autism spectrum disorder (ASD) to date (n=35,584 total samples, 11,986 with ASD). I downloaded the risk genes identified from table S4 of the biorxiv publication at https://www.biorxiv.org/content/10.1101/484113v3.supplementary-material

iPSYCH <- read_excel("media-5.xlsx", sheet = 3)
iPSYCHgenes <- na.omit(unique(as.character(iPSYCH$gene)))
iPSYCHgenes <- str_squish(iPSYCHgenes)

Recent neuropsychiatric diseases-causative genes were downloaded from Extended Data Table 5 of this paper: https://www.nature.com/articles/s41593-021-00802-y

table <- read_excel("41593_2021_802_MOESM2_ESM.xlsx", sheet = 5,skip = 2)
## New names:
## * disease -> disease...1
## * hgnc_symbol -> hgnc_symbol...2
## * ensembl_gene_id -> ensembl_gene_id...3
## * `` -> ...4
## * disease -> disease...5
## * ...
NeuropsychiatricDiseases <- list()
NeuropsychiatricDiseases$Autism <- unique(as.character(table$hgnc_symbol...2))
NeuropsychiatricDiseases$IntellectualDisability <- unique(as.character(table$hgnc_symbol...6))
NeuropsychiatricDiseases$Schizophrenia <- unique(as.character(table$hgnc_symbol...10))
NeuropsychiatricDiseases$Alzheimer <- unique(as.character(table$hgnc_symbol...14))
NeuropsychiatricDiseases$Parkinson <- unique(as.character(table$hgnc_symbol...18))
NeuropsychiatricDiseases$PSP_FTD <- unique(as.character(table$hgnc_symbol...22))
NeuropsychiatricDiseases$Eplilepsy <- unique(as.character(table$hgnc_symbol...26))
NeuropsychiatricDiseases$All <- na.omit(unique(as.character(unlist(NeuropsychiatricDiseases))))
NeuropsychiatricDiseases$All <- str_squish(NeuropsychiatricDiseases$All)

PsychencodeNDD genes were downloaded from this paper: https://science.sciencemag.org/content/362/6420/eaat8127

PsychencodeNDD <- readRDS("../../Data/psychencode.DEGs.rds")
PsychencodeASD <- na.omit(unique(as.character(union(PsychencodeNDD$`psychencode:ASD down`,PsychencodeNDD$`psychencode:ASD up`))))

SparkASD genes were downloaded from: https://sparkforautism.org/portal/page/genetic-analysis-faq/

SparkASD <- unique(c("ACTB","ADNP","ADSL","AFF2","AHDC1","ALDH5A1","ANK2","ANK3","ANKRD11","ARHGEF9","ARID1B","ARX","ASH1L","ASXL3","ATRX","AUTS2","BAZ2B","BCKDK","BCL11A","BRAF","BRSK2","CACNA1C","CAPRIN1","CASK","CASZ1","CDKL5","CHAMP1","CHD2","CHD3","CHD7","CHD8","CIC","CNOT3","CREBBP","CSDE1","CTCF","CTNNB1","CUL3","DDX3X","DEAF1","DHCR7","DLG4","DMPK","DNMT3A","DSCAM","DYRK1A","EBF3","EHMT1","EIF3F","(F232V)","EP300","FMR1","FOXG1","FOXP1","GIGYF1","GIGYF2","GRIN2B","HIVEP2","HNRNPH2","HNRNPU","HRAS","IQSEC2","IRF2BPL","KANSL1","KCNB1","KDM3B","KDM6B","KIAA2022","KMT2A","KMT2C","KMT5B","KRAS","LZTR1","MAGEL2","MAP2K1","MAP2K2","MBD5","MBOAT7","MECP2","MED13","MED13L","MEIS2","MYT1L","NAA15","NBEA","NCKAP1","NF1","NIPBL","NLGN2","NLGN3","NR4A2","NRAS","NRXN1","NRXN2","NRXN3","NSD1","PACS1","PCDH19","PHF21A","PHF3","PHIP","POGZ","POMGNT1","PPP1CB","PPP2R5D","PSMD12","PTCHD1","PTEN","PTPN11","RAF1","RAI1","RELN","RERE","RFX3","RIMS1","RIT1","RORB","SCN1A","SCN2A","SCN8A","SETBP1","SETD2","SETD5","SHANK2","SHANK3","SHOC2","SIN3A","SLC6A1","SLC9A6","SMARCC2","SON","SOS1","SOS2","SOX5","SPAST","SRCAP","STXBP1","SYNGAP1","TANC2","TAOK1","TBCK","TBR1","TCF20","TCF4","TLK2","TRIO","TRIP12","TSC1","TSC2","TSHZ3","UBE3A","UPF3B","VPS13B","WAC","WDFY3","ZBTB20","ZNF292","ZNF462"))

I then save all the objects

ASD <- list(SFARI=SFARIgenesAll,  MSSNG=MSSNGgenes, iPSYCH=iPSYCHgenes ,  ID=IDGenes, SingleCellASD=SingleCellASDgenes, NeuronASD=NeuronASDgenes, AstrocyteASD=AstrocyteASDgenes,NeuropsychiatricDiseasesASD = NeuropsychiatricDiseases$Autism, PsychencodeASD = PsychencodeASD)
save(ASD, SFARI, SFARIgenes, NeuropsychiatricDiseases, PsychencodeNDD, file = "../../Data/ASD.RData")

Authors

Nicolò Caporale: ,

Cristina Cheroni:

Pierre-Luc Germain:

Giuseppe Testa:

Lab: http://www.testalab.eu/

‘Date: December 13, 2021’

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] readxl_1.3.1    forcats_0.5.1   stringr_1.4.0   dplyr_1.0.5    
##  [5] purrr_0.3.4     readr_1.4.0     tidyr_1.1.3     tibble_3.1.0   
##  [9] ggplot2_3.3.3   tidyverse_1.3.0
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.1.0  xfun_0.21         bslib_0.2.4       haven_2.3.1      
##  [5] colorspace_2.0-0  vctrs_0.3.7       generics_0.1.0    htmltools_0.5.1.1
##  [9] yaml_2.2.1        utf8_1.2.1        rlang_0.4.10      jquerylib_0.1.3  
## [13] pillar_1.5.1      withr_2.4.1       glue_1.4.2        DBI_1.1.1        
## [17] dbplyr_2.1.0      modelr_0.1.8      lifecycle_1.0.0   munsell_0.5.0    
## [21] gtable_0.3.0      cellranger_1.1.0  rvest_0.3.6       evaluate_0.14    
## [25] knitr_1.31        fansi_0.4.2       broom_0.7.5       Rcpp_1.0.6       
## [29] backports_1.2.1   scales_1.1.1      jsonlite_1.7.2    fs_1.5.0         
## [33] hms_1.0.0         digest_0.6.27     stringi_1.5.3     grid_4.0.3       
## [37] cli_2.3.1         tools_4.0.3       magrittr_2.0.1    sass_0.3.1       
## [41] crayon_1.4.1      pkgconfig_2.0.3   ellipsis_0.3.1    xml2_1.3.2       
## [45] reprex_1.0.0      lubridate_1.7.9.2 assertthat_0.2.1  rmarkdown_2.7    
## [49] httr_1.4.2        rstudioapi_0.13   R6_2.5.0          compiler_4.0.3