library(tidyverse)
library(devtools)
library(broom)
library(data.table)
library(RSQLite)
library(Hmisc)
"%&%" = function(a,b) paste(a,b,sep="")
<- "/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/"
dir set.seed(777)
Coregulation_across_Tissues_Species
- Correlation between genes within each tissue and then calculate correlation of the correlation between genes across tissues
<- list.files("/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/metabolic_traits", pattern = "predict.txt", full.names = TRUE)
filelist <- read_tsv(dir %&% "expression/ortholog_genes_rats_humans.tsv", col_names = TRUE) orth.rats
Generate Correlation Matrices for 100 rats - compare predicted Expression across tissues
<- read_tsv(filelist[1]) %>% select(c(FID))
names
for(i in 1:100) {
= sample(names$FID, size = 1)
id <- data.frame(gene = as.character())
tempo for(fila in filelist) {
<- substr(fila, 89,90)
name <- fread(fila) %>% filter(FID == id) %>% pivot_longer(!c(FID, IID), names_to = "gene", values_to = name) %>% select(-c(FID, IID))
tis <- full_join(tempo, tis, by = "gene")
tempo
} <- tempo %>% mutate(var = apply(tempo[,-1], 1, var)) %>% na.omit()
tempo saveRDS(tempo, dir %&% "prediXcan/GREx_comp/cor_tis_per_ind/" %&% id %&% ".GREx.mat.RDS")
}
Check heatmap of some individuals
<- readRDS("/Users/natashasanthanam/Downloads/00077E6712.cor.mat.RDS")
i1 <- readRDS("/Users/natashasanthanam/Downloads/00077E7788.cor.mat.RDS")
i2
<- melt(i1, na.rm = TRUE)
melted_i1 <- melt(i2, na.rm = TRUE)
melted_i2
= ggplot(data = melted_i1, aes(x=Var1, y=Var2, fill=value)) +
p1geom_tile() + theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),) + ggtitle("00077E6712") + theme(aspect.ratio = 1)
= ggplot(data = melted_i2, aes(x=Var1, y=Var2, fill=value)) +
p2geom_tile() + theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),) + ggtitle("00077E7788") + theme(aspect.ratio = 1)
ggarrange(p1, p2, ncol=2)
Look at all genes across tissues in one individual Evidence of shared regulation across tissues
= readRDS("/Users/natashasanthanam/Downloads/00077E83E3.GREx.mat.RDS") %>% dplyr::select(-c(gene, var))
i3 pairs(i3)
- Co-regulation Shared Across Species
<- data.frame(id = fread(filelist[1]) %>% select(-c(FID, IID)) %>% colnames()
gene_ids for(fila in filelist[2:length(filelist)]) {
<- data.frame(id = fread(fila) %>% select(-c(FID, IID)) %>% colnames())
df <- inner_join(gene_ids, df, by = "id")
gene_ids }
<- fread(dir %&% "Box_files/gtf.txt", header = TRUE)
gtf <- gtf[match(tempo$id, gtf$Gene),]
gtf
for(i in 1:20) {
<- gtf %>% filter(Chr == i) %>% select(c(Gene))
tempo if(nrow(tempo) == 0 ) {
= i+1
i
}else {
<- data.frame(row = as.character(), column = as.character())
df for(fila in filelist) {
<- substr(fila, 58,59)
tis <- as.data.frame(fread(fila) %>% select(-c(FID, IID)))
expr <- expr[,intersect(tempo$Gene, colnames(expr))]
expr <-rcorr(as.matrix(expr[,]))
res2<- flattenCorrMatrix(res2$r, res2$P)
d2 colnames(d2)[3] = tis
colnames(d2)[4] = paste("p", tis, sep = "_")
<- full_join(df, d2, by = c("row", "column"))
df
}saveRDS(df, dir %&% "prediXcan/GREx_comp/cor_genes_per_chr/chr" %&% i %&% ".RDS" )
} }
Save correlation of coregulation across tissues
<- "/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/GREx_comp/cor_genes_per_chr"
coreg.dir <- list.files(coreg.dir, pattern = ".RDS", full.names = TRUE)
filelist
for(fila in filelist) {
<- readRDS(fila) %>% select(c(row, column, Ac, Il, Pl, Lh, Vo)) %>%
tempo <- substr(fila, 89, str_length(fila)- 4)
i <- cor(tempo[,3:7])
cor.mat saveRDS(cor.mat, dir %&% "prediXcan/GREx_comp/cor_genes_per_chr/cor_coreg_chr" %&% i %&% ".RDS")
}
Graph of Correlation of Coregulation across tissues
<- "/Users/natashasanthanam/CRI/"
data.dir <- list.files(data.dir, pattern="cor_coreg", full.names = TRUE)
filelist <- list()
corr_coreg
for(fila in filelist) {
<- match(fila, filelist)
i <- readRDS(fila)
corr_coreg[[i]] }
Generate Coregulation in Humans (GTEx)
First generate predicted expression in G1000 using GTEx models
conda activate imlabtools
export METAXCAN=/gpfs/data/im-lab/nas40t2/natasha/GTEX_Analysis/MetaXcan/software
export GENO=/gpfs/data/im-lab/nas40t2/Data/dbGaP/Transcriptome/G1000/imputed_hrc1.1
export MODEL=/gpfs/data/im-lab/nas40t2/Data/PredictDB/GTEx_v8/models_v1/eqtl/ctimp
export RESULTS=/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/PTRS_weights/PGP
export DATA=/gpfs/data/im-lab/nas40t2/natasha/GTEX_Analysis/predixcan
printf "Predict expression\n\n"
python3 $METAXCAN/Predict.py \
$MODEL/ctimp_$TISSUE.db \
--model_db_path \
--model_db_snp_key varID $GENO/chr*.dose.vcf.gz \
--vcf_genotypes \
--vcf_mode genotyped $DATA/hg19ToHg38.over.chain.gz \
--liftover "chr{}_{}_{}_{}_b38" \
--on_the_fly_mapping METADATA $RESULTS/G1000__$TISSUE.predict.txt \
--prediction_output $RESULTS/G1000__$TISSUE.summary.txt \
--prediction_summary_output \
--verbosity 9
--throw
TISSUE=Brain_Cerebellum
<- function(cormat, pmat) {
flattenCorrMatrix <- upper.tri(cormat)
ut data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
) }
Calculate Coregulation between genes in GTEx
<- fread("/gpfs/data/im-lab/nas40t2/natasha/GTEX_Analysis/annotations_gencode_v26.tsv", header = TRUE)
gtf
for(i in 1:20) {
<- gtf %>% filter(chromosome == paste("chr", i, sep="")) %>% select(c(gene_id))
tempo <- as.data.frame( fread("/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/GREx_comp/G1000_less_mem__Brain_Cerebellum.predict.txt")) %>% select(-c(FID, IID))
expr colnames(expr) = sapply(strsplit(colnames(expr), "\\."), `[`, 1)
<- expr[, intersect(tempo$gene_id, colnames(expr))]
expr <-rcorr(as.matrix(expr[,]))
res2<- flattenCorrMatrix(res2$r, res2$P)
d2 saveRDS(d2, dir %&% "prediXcan/GREx_comp/cor_GTEx_genes_per_chr/GTEx_chr" %&% i %&% ".RDS" )
}
#Can also calculate correlation between genes in GTEx for all genes
<- as.data.frame( fread("/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/GREx_comp/G1000_less_mem__Brain_Cerebellum.predict.txt")) %>% select(-c(FID, IID))
expr colnames(expr) = sapply(strsplit(colnames(expr), "\\."), `[`, 1)
<-rcorr(as.matrix(expr[,]))
res2<- flattenCorrMatrix(res2$r, res2$P) d2
Check if Coregualtion is preserved across species
Plot Coregulation across species
<- "/Users/natashasanthanam/CRI/"
p.dir <- list.files(p.dir, pattern = "all", full.names = TRUE)
filelist
for(fila in filelist) {
<- readRDS(fila)
df pairs(df)
}
Heatmap ordered with TSS
<- readRDS("/Users/natashasanthanam/Downloads/cor_pred_expr_GTEx_all_genes.RDS") %>% select(-c(p)) %>% mutate(start = gtf[match(only_GTEx$row, gtf$gene_id), 5]$start) %>% distinct(row, column, .keep_all = TRUE)
only_GTEx
<- only_GTEx[sort(only_GTEx$start),]
GTEx_ordered
<- only_GTEx[na.omit(match(gtf$gene_id, only_GTEx$row)), ]
GTEx_chr2_genes
= ggplot(data = GTEx_ordered, aes(x=row, y=column, fill=cor)) +
p3geom_tile() + theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(), axis.text = element_text(size = 2)) + ggtitle("Heatmap for predicted expression in GTEx Cerebellum")
p3
No matching items