options(stringsAsFactors = FALSE) library(data.table) library(survival) library("OIsurv") library("ggplot2") library("survminer") pat_w_ali <- read.csv("META/release_aug2016.included.final.txt", header=FALSE, sep="\t") gsubsubz <- gsub("(*)\\.csv", "\\1" , dir("Will_Clin_data/")) for(i in 1:9) { # 1. Calculate the mean variant GERP by patient subsub <- gsubsubz[i] print(subsub) x <- as.data.frame(fread(paste("DATA/NC/OBS/data.obs.nc.", subsub, ".txt", sep=""))) x$gerp <- as.numeric(x$gerp) x <- x[!is.na(x$gerp),] x$gerp[x$gerp<0] <- 0 ag.gerp <- aggregate(x$gerp, by=list(x$sample), FUN=mean) ag.num <- aggregate(x$gerp, by=list(x$sample), FUN=length) names(ag.gerp) <- c("Ali", "GerpSum") names(ag.num) <- c("Ali", "MutNum") # 2. Merge with clinical information s <- read.csv(paste("Will_Clin_data/", subsub, ".csv", sep="")) s$Survival_obj2 <- Surv(time=s$Time_to_event, event=as.logical(gsub(" ", "",s$Event)) ) tot <- merge(s, ag.gerp) tot <- merge(tot, ag.num) tot <- tot[!is.na(tot$GerpSum) & !is.na(tot$Survival_obj2),] tot <- tot[tot$Ali %in% pat_w_ali[,2],] tot$QuartBurden <- 4*rank(tot$GerpSum)/nrow(tot) # 3. Fit a Cox proportional hazard model M <- coxph(formula=Survival_obj2 ~ QuartBurden + Age + MutNum, data=tot, na.action=na.exclude) Mx0 <- as.matrix(coef(summary(M))) print(Mx0) # 4. Plot the results tot$MeanGerp <- "low" tot$MeanGerp[rank(tot$GerpSum)/nrow(tot) >= 0.75] <- "high" tot$AgeCat <- "Younger" tot$AgeCat[rank(tot$Age)/nrow(tot) > 0.5] <- "Older" tot$Strata <- paste(tot$MeanGerp, tot$AgeCat, sep="_") cll1 <- survfit(Survival_obj2 ~ MeanGerp, data=tot,na.action=na.exclude) pdf(paste(subsub,".survival4.pdf",sep="")) par(font=4) print(ggsurvplot(cll1, conf.int = TRUE, main=subsub, xlab="Time (days)", risk.table=FALSE, font.legend="bold")) dev.off() }