Coregulation_across_Tissues_Species

Author

Natasha Santhanam

Published

February 14, 2022

library(tidyverse)
library(devtools)
library(broom)
library(data.table)
library(RSQLite)
library(Hmisc)
"%&%" = function(a,b) paste(a,b,sep="")
dir <- "/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/"
set.seed(777)
  1. Correlation between genes within each tissue and then calculate correlation of the correlation between genes across tissues
filelist <- list.files("/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/metabolic_traits", pattern = "predict.txt", full.names = TRUE)
orth.rats <- read_tsv(dir %&% "expression/ortholog_genes_rats_humans.tsv", col_names = TRUE)

Generate Correlation Matrices for 100 rats - compare predicted Expression across tissues

names <- read_tsv(filelist[1]) %>% select(c(FID))

for(i in 1:100) {
  id = sample(names$FID, size = 1)
  tempo <- data.frame(gene = as.character())
  for(fila in filelist) {
    name <- substr(fila, 89,90)
    tis <- fread(fila) %>% filter(FID == id) %>% pivot_longer(!c(FID, IID), names_to = "gene", values_to = name) %>% select(-c(FID, IID))
    tempo <- full_join(tempo, tis,  by = "gene")
  } 
  tempo <- tempo %>% mutate(var = apply(tempo[,-1], 1, var)) %>% na.omit()
  saveRDS(tempo, dir %&% "prediXcan/GREx_comp/cor_tis_per_ind/" %&% id %&% ".GREx.mat.RDS")
}

Check heatmap of some individuals

i1 <- readRDS("/Users/natashasanthanam/Downloads/00077E6712.cor.mat.RDS")
i2 <- readRDS("/Users/natashasanthanam/Downloads/00077E7788.cor.mat.RDS")

melted_i1 <- melt(i1, na.rm = TRUE)
melted_i2 <- melt(i2, na.rm = TRUE)

p1= ggplot(data = melted_i1, aes(x=Var1, y=Var2, fill=value)) + 
  geom_tile() + theme(
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),) + ggtitle("00077E6712") + theme(aspect.ratio = 1)
p2= ggplot(data = melted_i2, aes(x=Var1, y=Var2, fill=value)) + 
  geom_tile() + theme(
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),) + ggtitle("00077E7788") + theme(aspect.ratio = 1)

ggarrange(p1, p2, ncol=2)

Look at all genes across tissues in one individual Evidence of shared regulation across tissues

i3 = readRDS("/Users/natashasanthanam/Downloads/00077E83E3.GREx.mat.RDS") %>% dplyr::select(-c(gene, var))
pairs(i3)
  1. Co-regulation Shared Across Species
gene_ids <- data.frame(id = fread(filelist[1]) %>% select(-c(FID, IID)) %>%  colnames()
for(fila in filelist[2:length(filelist)]) {
  df <- data.frame(id = fread(fila) %>% select(-c(FID, IID)) %>%  colnames())
  gene_ids <- inner_join(gene_ids, df, by = "id")
}
gtf <- fread(dir %&% "Box_files/gtf.txt", header = TRUE)
gtf <-  gtf[match(tempo$id, gtf$Gene),]
 
for(i in 1:20) {
  tempo <- gtf %>% filter(Chr == i) %>% select(c(Gene))
  if(nrow(tempo) == 0 ) {
  i = i+1  
  }
  else {
  df <- data.frame(row = as.character(), column = as.character())
  for(fila in filelist) {
    tis <- substr(fila, 58,59)
    expr <- as.data.frame(fread(fila) %>% select(-c(FID, IID)))
    expr <- expr[,intersect(tempo$Gene, colnames(expr))]
    res2<-rcorr(as.matrix(expr[,]))
  d2 <- flattenCorrMatrix(res2$r, res2$P)
    colnames(d2)[3] = tis
    colnames(d2)[4] = paste("p", tis, sep = "_")
    df <- full_join(df, d2, by = c("row", "column"))
  }
  saveRDS(df, dir %&% "prediXcan/GREx_comp/cor_genes_per_chr/chr" %&% i %&% ".RDS" )
  }
}

Save correlation of coregulation across tissues

coreg.dir <- "/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/GREx_comp/cor_genes_per_chr"
filelist <- list.files(coreg.dir, pattern = ".RDS", full.names = TRUE)

for(fila in filelist) {
  tempo <- readRDS(fila) %>% select(c(row, column, Ac, Il, Pl, Lh, Vo)) %>%
  i <- substr(fila, 89, str_length(fila)- 4)
  cor.mat <-  cor(tempo[,3:7])
  saveRDS(cor.mat, dir %&% "prediXcan/GREx_comp/cor_genes_per_chr/cor_coreg_chr" %&% i %&% ".RDS")
}

Graph of Correlation of Coregulation across tissues

data.dir <- "/Users/natashasanthanam/CRI/"
filelist <- list.files(data.dir, pattern="cor_coreg", full.names = TRUE)
corr_coreg <- list()

for(fila in filelist) {
  i <- match(fila, filelist)
  corr_coreg[[i]] <- readRDS(fila)
}

Generate Coregulation in Humans (GTEx)

First generate predicted expression in G1000 using GTEx models

conda activate imlabtools
export METAXCAN=/gpfs/data/im-lab/nas40t2/natasha/GTEX_Analysis/MetaXcan/software
export GENO=/gpfs/data/im-lab/nas40t2/Data/dbGaP/Transcriptome/G1000/imputed_hrc1.1
export MODEL=/gpfs/data/im-lab/nas40t2/Data/PredictDB/GTEx_v8/models_v1/eqtl/ctimp
export RESULTS=/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/PTRS_weights/PGP
export DATA=/gpfs/data/im-lab/nas40t2/natasha/GTEX_Analysis/predixcan

printf "Predict expression\n\n"

python3 $METAXCAN/Predict.py \
--model_db_path $MODEL/ctimp_$TISSUE.db \
--model_db_snp_key varID \
--vcf_genotypes $GENO/chr*.dose.vcf.gz  \
--vcf_mode genotyped \
--liftover $DATA/hg19ToHg38.over.chain.gz  \
--on_the_fly_mapping METADATA "chr{}_{}_{}_{}_b38" \
--prediction_output $RESULTS/G1000__$TISSUE.predict.txt \
--prediction_summary_output $RESULTS/G1000__$TISSUE.summary.txt \
--verbosity 9 \
--throw

TISSUE=Brain_Cerebellum
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
    )
}

Calculate Coregulation between genes in GTEx

gtf <- fread("/gpfs/data/im-lab/nas40t2/natasha/GTEX_Analysis/annotations_gencode_v26.tsv", header = TRUE)

 for(i in 1:20) {
  tempo <- gtf %>% filter(chromosome == paste("chr", i, sep="")) %>% select(c(gene_id))
  expr <- as.data.frame( fread("/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/GREx_comp/G1000_less_mem__Brain_Cerebellum.predict.txt")) %>% select(-c(FID, IID)) 
 colnames(expr) = sapply(strsplit(colnames(expr), "\\."), `[`, 1)
    expr <- expr[, intersect(tempo$gene_id, colnames(expr))]
   res2<-rcorr(as.matrix(expr[,]))
  d2 <- flattenCorrMatrix(res2$r, res2$P)
saveRDS(d2, dir %&% "prediXcan/GREx_comp/cor_GTEx_genes_per_chr/GTEx_chr" %&% i %&% ".RDS" )
 }

#Can also calculate correlation between genes in GTEx for all genes
expr <- as.data.frame( fread("/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/GREx_comp/G1000_less_mem__Brain_Cerebellum.predict.txt")) %>% select(-c(FID, IID)) 
 colnames(expr) = sapply(strsplit(colnames(expr), "\\."), `[`, 1)
 res2<-rcorr(as.matrix(expr[,]))
  d2 <- flattenCorrMatrix(res2$r, res2$P)

Check if Coregualtion is preserved across species

Plot Coregulation across species

p.dir <- "/Users/natashasanthanam/CRI/"
filelist <- list.files(p.dir, pattern = "all", full.names = TRUE)

for(fila in filelist) {
  df <- readRDS(fila)
  pairs(df)
}

Heatmap ordered with TSS

only_GTEx <- readRDS("/Users/natashasanthanam/Downloads/cor_pred_expr_GTEx_all_genes.RDS") %>% select(-c(p)) %>% mutate(start = gtf[match(only_GTEx$row, gtf$gene_id), 5]$start) %>% distinct(row, column, .keep_all = TRUE)

GTEx_ordered <- only_GTEx[sort(only_GTEx$start),]

GTEx_chr2_genes <- only_GTEx[na.omit(match(gtf$gene_id, only_GTEx$row)), ]

p3= ggplot(data = GTEx_ordered, aes(x=row, y=column, fill=cor)) + 
  geom_tile() + theme(
  axis.title.x = element_blank(),
  axis.title.y = element_blank(), axis.text = element_text(size = 2)) + ggtitle("Heatmap for predicted expression in GTEx Cerebellum")
p3
No matching items