library(tidyverse)
library(devtools)
library(broom)
library(data.table)
library(RSQLite)
library(Hmisc)
"%&%" = function(a,b) paste(a,b,sep="")
dir <- "/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/"
set.seed(777)Coregulation_across_Tissues_Species
- Correlation between genes within each tissue and then calculate correlation of the correlation between genes across tissues
filelist <- list.files("/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/metabolic_traits", pattern = "predict.txt", full.names = TRUE)
orth.rats <- read_tsv(dir %&% "expression/ortholog_genes_rats_humans.tsv", col_names = TRUE)Generate Correlation Matrices for 100 rats - compare predicted Expression across tissues
names <- read_tsv(filelist[1]) %>% select(c(FID))
for(i in 1:100) {
id = sample(names$FID, size = 1)
tempo <- data.frame(gene = as.character())
for(fila in filelist) {
name <- substr(fila, 89,90)
tis <- fread(fila) %>% filter(FID == id) %>% pivot_longer(!c(FID, IID), names_to = "gene", values_to = name) %>% select(-c(FID, IID))
tempo <- full_join(tempo, tis, by = "gene")
}
tempo <- tempo %>% mutate(var = apply(tempo[,-1], 1, var)) %>% na.omit()
saveRDS(tempo, dir %&% "prediXcan/GREx_comp/cor_tis_per_ind/" %&% id %&% ".GREx.mat.RDS")
}Check heatmap of some individuals
i1 <- readRDS("/Users/natashasanthanam/Downloads/00077E6712.cor.mat.RDS")
i2 <- readRDS("/Users/natashasanthanam/Downloads/00077E7788.cor.mat.RDS")
melted_i1 <- melt(i1, na.rm = TRUE)
melted_i2 <- melt(i2, na.rm = TRUE)
p1= ggplot(data = melted_i1, aes(x=Var1, y=Var2, fill=value)) +
geom_tile() + theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),) + ggtitle("00077E6712") + theme(aspect.ratio = 1)
p2= ggplot(data = melted_i2, aes(x=Var1, y=Var2, fill=value)) +
geom_tile() + theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),) + ggtitle("00077E7788") + theme(aspect.ratio = 1)
ggarrange(p1, p2, ncol=2)Look at all genes across tissues in one individual Evidence of shared regulation across tissues
i3 = readRDS("/Users/natashasanthanam/Downloads/00077E83E3.GREx.mat.RDS") %>% dplyr::select(-c(gene, var))
pairs(i3)- Co-regulation Shared Across Species
gene_ids <- data.frame(id = fread(filelist[1]) %>% select(-c(FID, IID)) %>% colnames()
for(fila in filelist[2:length(filelist)]) {
df <- data.frame(id = fread(fila) %>% select(-c(FID, IID)) %>% colnames())
gene_ids <- inner_join(gene_ids, df, by = "id")
}gtf <- fread(dir %&% "Box_files/gtf.txt", header = TRUE)
gtf <- gtf[match(tempo$id, gtf$Gene),]
for(i in 1:20) {
tempo <- gtf %>% filter(Chr == i) %>% select(c(Gene))
if(nrow(tempo) == 0 ) {
i = i+1
}
else {
df <- data.frame(row = as.character(), column = as.character())
for(fila in filelist) {
tis <- substr(fila, 58,59)
expr <- as.data.frame(fread(fila) %>% select(-c(FID, IID)))
expr <- expr[,intersect(tempo$Gene, colnames(expr))]
res2<-rcorr(as.matrix(expr[,]))
d2 <- flattenCorrMatrix(res2$r, res2$P)
colnames(d2)[3] = tis
colnames(d2)[4] = paste("p", tis, sep = "_")
df <- full_join(df, d2, by = c("row", "column"))
}
saveRDS(df, dir %&% "prediXcan/GREx_comp/cor_genes_per_chr/chr" %&% i %&% ".RDS" )
}
}Save correlation of coregulation across tissues
coreg.dir <- "/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/GREx_comp/cor_genes_per_chr"
filelist <- list.files(coreg.dir, pattern = ".RDS", full.names = TRUE)
for(fila in filelist) {
tempo <- readRDS(fila) %>% select(c(row, column, Ac, Il, Pl, Lh, Vo)) %>%
i <- substr(fila, 89, str_length(fila)- 4)
cor.mat <- cor(tempo[,3:7])
saveRDS(cor.mat, dir %&% "prediXcan/GREx_comp/cor_genes_per_chr/cor_coreg_chr" %&% i %&% ".RDS")
}Graph of Correlation of Coregulation across tissues
data.dir <- "/Users/natashasanthanam/CRI/"
filelist <- list.files(data.dir, pattern="cor_coreg", full.names = TRUE)
corr_coreg <- list()
for(fila in filelist) {
i <- match(fila, filelist)
corr_coreg[[i]] <- readRDS(fila)
}Generate Coregulation in Humans (GTEx)
First generate predicted expression in G1000 using GTEx models
conda activate imlabtools
export METAXCAN=/gpfs/data/im-lab/nas40t2/natasha/GTEX_Analysis/MetaXcan/software
export GENO=/gpfs/data/im-lab/nas40t2/Data/dbGaP/Transcriptome/G1000/imputed_hrc1.1
export MODEL=/gpfs/data/im-lab/nas40t2/Data/PredictDB/GTEx_v8/models_v1/eqtl/ctimp
export RESULTS=/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/PTRS_weights/PGP
export DATA=/gpfs/data/im-lab/nas40t2/natasha/GTEX_Analysis/predixcan
printf "Predict expression\n\n"
python3 $METAXCAN/Predict.py \
--model_db_path $MODEL/ctimp_$TISSUE.db \
--model_db_snp_key varID \
--vcf_genotypes $GENO/chr*.dose.vcf.gz \
--vcf_mode genotyped \
--liftover $DATA/hg19ToHg38.over.chain.gz \
--on_the_fly_mapping METADATA "chr{}_{}_{}_{}_b38" \
--prediction_output $RESULTS/G1000__$TISSUE.predict.txt \
--prediction_summary_output $RESULTS/G1000__$TISSUE.summary.txt \
--verbosity 9 \
--throw
TISSUE=Brain_CerebellumflattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
)
}Calculate Coregulation between genes in GTEx
gtf <- fread("/gpfs/data/im-lab/nas40t2/natasha/GTEX_Analysis/annotations_gencode_v26.tsv", header = TRUE)
for(i in 1:20) {
tempo <- gtf %>% filter(chromosome == paste("chr", i, sep="")) %>% select(c(gene_id))
expr <- as.data.frame( fread("/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/GREx_comp/G1000_less_mem__Brain_Cerebellum.predict.txt")) %>% select(-c(FID, IID))
colnames(expr) = sapply(strsplit(colnames(expr), "\\."), `[`, 1)
expr <- expr[, intersect(tempo$gene_id, colnames(expr))]
res2<-rcorr(as.matrix(expr[,]))
d2 <- flattenCorrMatrix(res2$r, res2$P)
saveRDS(d2, dir %&% "prediXcan/GREx_comp/cor_GTEx_genes_per_chr/GTEx_chr" %&% i %&% ".RDS" )
}
#Can also calculate correlation between genes in GTEx for all genes
expr <- as.data.frame( fread("/gpfs/data/im-lab/nas40t2/natasha/rat_genomics/prediXcan/GREx_comp/G1000_less_mem__Brain_Cerebellum.predict.txt")) %>% select(-c(FID, IID))
colnames(expr) = sapply(strsplit(colnames(expr), "\\."), `[`, 1)
res2<-rcorr(as.matrix(expr[,]))
d2 <- flattenCorrMatrix(res2$r, res2$P)Check if Coregualtion is preserved across species
Plot Coregulation across species
p.dir <- "/Users/natashasanthanam/CRI/"
filelist <- list.files(p.dir, pattern = "all", full.names = TRUE)
for(fila in filelist) {
df <- readRDS(fila)
pairs(df)
}Heatmap ordered with TSS
only_GTEx <- readRDS("/Users/natashasanthanam/Downloads/cor_pred_expr_GTEx_all_genes.RDS") %>% select(-c(p)) %>% mutate(start = gtf[match(only_GTEx$row, gtf$gene_id), 5]$start) %>% distinct(row, column, .keep_all = TRUE)
GTEx_ordered <- only_GTEx[sort(only_GTEx$start),]
GTEx_chr2_genes <- only_GTEx[na.omit(match(gtf$gene_id, only_GTEx$row)), ]
p3= ggplot(data = GTEx_ordered, aes(x=row, y=column, fill=cor)) +
geom_tile() + theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(), axis.text = element_text(size = 2)) + ggtitle("Heatmap for predicted expression in GTEx Cerebellum")
p3
No matching items