SFARI genes were downloaded from https://gene.sfari.org/database/gene-scoring/
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
<- read.csv("SFARI-Gene_genes_06-20-2019release_07-13-2019export.csv")
SFARI <- list()
SFARIgenes $score1 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==1)])
SFARIgenes$score2 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==2)])
SFARIgenes$score3 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==3)])
SFARIgenes$score4 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==4)])
SFARIgenes$score5 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==5)])
SFARIgenes$score6 <- as.character(SFARI$gene.symbol[which(SFARI$gene.score==6)])
SFARIgenes<- na.omit(unique(unlist(SFARIgenes)))
SFARIgenesAll<- str_squish(SFARIgenesAll) SFARIgenesAll
AutismKB genes were downloaded from http://db.cbi.pku.edu.cn/autismkb_v2/index.php
<- read.table("AutismKB.txt", header = F, sep = "\t")
AutismKB colnames(AutismKB) <- c("Gene Symbol", "Entrez ID", "Genome-Wide Association Studies", "CNV/SV Studies", "Linkage Analyses", "Low-Scale Genetic Association Studies", "Expression Profilings", "NGS de novo Mutation Studies", "NGS Mosaic Mutation Studies", "NGS Other Studies", "Low-Scale Gene Studies", "total score")
<- na.omit(unique(as.character(AutismKB$`Gene Symbol`)))
AutismKBgenes<- str_squish(AutismKBgenes) AutismKBgenes
Decipher genes were downloaded from https://decipher.sanger.ac.uk/about#downloads/data
<- read.csv("DDG2P_16_7_2019.csv")
Decipher <- Decipher[c(grep("autism", Decipher$disease.name,ignore.case = T),grep("intellectual", Decipher$disease.name,ignore.case = T),grep("neurodevelopm", Decipher$disease.name, ignore.case = T)),]
DecipherNeuro <- na.omit(unique(as.character(DecipherNeuro$gene.symbol)))
DecipherNeurogenes <- str_squish(DecipherNeurogenes) DecipherNeurogenes
MSSNG genes were downloaded from https://www.ncbi.nlm.nih.gov/pubmed/28263302 Supplementary Table 5
library(readxl)
<- suppressMessages(read_excel("nn.4524-S7.xlsx"))
MSSNG colnames(MSSNG) <- MSSNG[1,]
<- MSSNG[-1,]
MSSNG <- na.omit(unique(as.character(MSSNG$gene_symbol)))
MSSNGgenes <- str_squish(MSSNGgenes) MSSNGgenes
ASDsevere genes were downloaded from https://www.nature.com/articles/s41436-018-0380-2 Supplementary Table S6
<- suppressMessages(read_excel("41436_2018_380_MOESM7_ESM.xlsx"))
ASDsevere <- na.omit(unique(as.character(ASDsevere$Gene)))
ASDsevereGenes <- str_squish(ASDsevereGenes) ASDsevereGenes
ID genes were downloaded from https://www.nature.com/articles/ng.3792 Supplementary Table S2
<- suppressMessages(read_excel("41588_2017_BFng3792_MOESM20_ESM.xlsx"))
ID colnames(ID) <- ID[1,]
<- ID[-1,]
ID <- na.omit(unique(as.character(ID$Gene)))
IDGenes <- str_squish(IDGenes) IDGenes
OMIM genes were downloaded from https://www.omim.org/downloads/ I then select only the genes that are related to neurodevelopmental phenotypes
<- read.table("morbidmap.txt",sep='\t', comment.char='#',fill = T, header = F,quote = "" )
morbidmap colnames(morbidmap) <-c("Phenotype", "Gene Symbols", "MIM Number", "Cyto Location")
#run this chunck if you want to check the specific phenopyes selected
#morbidmap$Phenotype[c(grep("autism", morbidmap$Phenotype,ignore.case = T),grep("intellectual", morbidmap$Phenotype,ignore.case = T),grep("neurodevelopm", morbidmap$Phenotype,ignore.case = T))]
<- morbidmap[c(grep("autism", morbidmap$Phenotype,ignore.case = T),grep("intellectual", morbidmap$Phenotype,ignore.case = T),grep("neurodevelopm", morbidmap$Phenotype,ignore.case = T)),]
NeuroOmim
<- as.character(morbidmap$`Gene Symbols`[c(grep("autism", morbidmap$Phenotype,ignore.case = T),grep("intellectual", morbidmap$Phenotype,ignore.case = T),grep("neurodevelopm", morbidmap$Phenotype,ignore.case = T))])
NeuroOmimGenes <- strsplit(NeuroOmimGenes,split = ',')
NeuroOmimGenes <- na.omit(unique(unlist(NeuroOmimGenes)))
NeuroOmimGenes <- str_squish(NeuroOmimGenes)
NeuroOmimGenes
<- as.character(morbidmap$`Gene Symbols`)
OmimGenes <- strsplit(OmimGenes,split = ',')
OmimGenes <- na.omit(unique(unlist(OmimGenes)))
OmimGenes <- str_squish(OmimGenes) OmimGenes
GDI indexes were downloaded from http://lab.rockefeller.edu/casanova/GDI
<- read.table("GDI_full_10282015.txt", header = T, sep = "\t") GDI
SingleCellASD genes were downloaded from Supplemntary Data 4 at https://science.sciencemag.org/content/suppl/2019/05/15/364.6441.685.DC1 selecting in particular the clusters of layers 2-3 neurons and protoplasmic astrocytes, to highlight the top cell specific differentially expressed genes, as shown in the paper https://science.sciencemag.org/content/364/6441/685.long
<- read_excel("aav8130_Data-S4.xls")
scASDgenes <- na.omit(unique(as.character(scASDgenes$`Gene name`)))
SingleCellASDgenes <- str_squish(SingleCellASDgenes)
SingleCellASDgenes
<- na.omit(unique(as.character(scASDgenes$`Gene name`)[which(scASDgenes$`Cell type`=="L2/3")]))
NeuronASDgenes<- str_squish(NeuronASDgenes)
NeuronASDgenes
<- na.omit(unique(as.character(scASDgenes$`Gene name`)[which(scASDgenes$`Cell type`=="AST-PP")]))
AstrocyteASDgenes<- str_squish(AstrocyteASDgenes) AstrocyteASDgenes
iPSYCH consortium is publishing the largest exome sequencing study of autism spectrum disorder (ASD) to date (n=35,584 total samples, 11,986 with ASD). I downloaded the risk genes identified from table S4 of the biorxiv publication at https://www.biorxiv.org/content/10.1101/484113v3.supplementary-material
<- read_excel("media-5.xlsx", sheet = 3)
iPSYCH <- na.omit(unique(as.character(iPSYCH$gene)))
iPSYCHgenes <- str_squish(iPSYCHgenes) iPSYCHgenes
Recent neuropsychiatric diseases-causative genes were downloaded from Extended Data Table 5 of this paper: https://www.nature.com/articles/s41593-021-00802-y
<- read_excel("41593_2021_802_MOESM2_ESM.xlsx", sheet = 5,skip = 2) table
## New names:
## * disease -> disease...1
## * hgnc_symbol -> hgnc_symbol...2
## * ensembl_gene_id -> ensembl_gene_id...3
## * `` -> ...4
## * disease -> disease...5
## * ...
<- list()
NeuropsychiatricDiseases $Autism <- unique(as.character(table$hgnc_symbol...2))
NeuropsychiatricDiseases$IntellectualDisability <- unique(as.character(table$hgnc_symbol...6))
NeuropsychiatricDiseases$Schizophrenia <- unique(as.character(table$hgnc_symbol...10))
NeuropsychiatricDiseases$Alzheimer <- unique(as.character(table$hgnc_symbol...14))
NeuropsychiatricDiseases$Parkinson <- unique(as.character(table$hgnc_symbol...18))
NeuropsychiatricDiseases$PSP_FTD <- unique(as.character(table$hgnc_symbol...22))
NeuropsychiatricDiseases$Eplilepsy <- unique(as.character(table$hgnc_symbol...26))
NeuropsychiatricDiseases$All <- na.omit(unique(as.character(unlist(NeuropsychiatricDiseases))))
NeuropsychiatricDiseases$All <- str_squish(NeuropsychiatricDiseases$All) NeuropsychiatricDiseases
PsychencodeNDD genes were downloaded from this paper: https://science.sciencemag.org/content/362/6420/eaat8127
<- readRDS("../../Data/psychencode.DEGs.rds")
PsychencodeNDD <- na.omit(unique(as.character(union(PsychencodeNDD$`psychencode:ASD down`,PsychencodeNDD$`psychencode:ASD up`)))) PsychencodeASD
SparkASD genes were downloaded from: https://sparkforautism.org/portal/page/genetic-analysis-faq/
<- unique(c("ACTB","ADNP","ADSL","AFF2","AHDC1","ALDH5A1","ANK2","ANK3","ANKRD11","ARHGEF9","ARID1B","ARX","ASH1L","ASXL3","ATRX","AUTS2","BAZ2B","BCKDK","BCL11A","BRAF","BRSK2","CACNA1C","CAPRIN1","CASK","CASZ1","CDKL5","CHAMP1","CHD2","CHD3","CHD7","CHD8","CIC","CNOT3","CREBBP","CSDE1","CTCF","CTNNB1","CUL3","DDX3X","DEAF1","DHCR7","DLG4","DMPK","DNMT3A","DSCAM","DYRK1A","EBF3","EHMT1","EIF3F","(F232V)","EP300","FMR1","FOXG1","FOXP1","GIGYF1","GIGYF2","GRIN2B","HIVEP2","HNRNPH2","HNRNPU","HRAS","IQSEC2","IRF2BPL","KANSL1","KCNB1","KDM3B","KDM6B","KIAA2022","KMT2A","KMT2C","KMT5B","KRAS","LZTR1","MAGEL2","MAP2K1","MAP2K2","MBD5","MBOAT7","MECP2","MED13","MED13L","MEIS2","MYT1L","NAA15","NBEA","NCKAP1","NF1","NIPBL","NLGN2","NLGN3","NR4A2","NRAS","NRXN1","NRXN2","NRXN3","NSD1","PACS1","PCDH19","PHF21A","PHF3","PHIP","POGZ","POMGNT1","PPP1CB","PPP2R5D","PSMD12","PTCHD1","PTEN","PTPN11","RAF1","RAI1","RELN","RERE","RFX3","RIMS1","RIT1","RORB","SCN1A","SCN2A","SCN8A","SETBP1","SETD2","SETD5","SHANK2","SHANK3","SHOC2","SIN3A","SLC6A1","SLC9A6","SMARCC2","SON","SOS1","SOS2","SOX5","SPAST","SRCAP","STXBP1","SYNGAP1","TANC2","TAOK1","TBCK","TBR1","TCF20","TCF4","TLK2","TRIO","TRIP12","TSC1","TSC2","TSHZ3","UBE3A","UPF3B","VPS13B","WAC","WDFY3","ZBTB20","ZNF292","ZNF462")) SparkASD
I then save all the objects
<- list(SFARI=SFARIgenesAll, MSSNG=MSSNGgenes, iPSYCH=iPSYCHgenes , ID=IDGenes, SingleCellASD=SingleCellASDgenes, NeuronASD=NeuronASDgenes, AstrocyteASD=AstrocyteASDgenes,NeuropsychiatricDiseasesASD = NeuropsychiatricDiseases$Autism, PsychencodeASD = PsychencodeASD)
ASD save(ASD, SFARI, SFARIgenes, NeuropsychiatricDiseases, PsychencodeNDD, file = "../../Data/ASD.RData")