Lecture slides here. (To convert html to pdf, press E \(\to\) Print \(\to\) Destination: Save to pdf)
Base R Code
Show the code
################################################################### This code generates all numerical results in chapter 4. ######################################################################################################################## Section 4.2.6 (Figure 4.2) # Cox proportional hazards model on GBC study data###################################################library(survival)############################################### GBC study############################################## read in the complete datagbc <-read.table("Data//German Breast Cancer Study//gbc.txt")# subset to first event data# Sort the data by time within each ido <-order(gbc$id,gbc$time)gbc <- gbc[o,]# get the first row for each iddata.CE <- gbc[!duplicated(gbc$id),]# set status=1 if status==2 or 1data.CE$status <- (data.CE$status>0)+0# fit the Cox proportional hazards model## preparationdata.CE$hormone <-factor(data.CE$hormone)data.CE$meno <-factor(data.CE$meno)data.CE$grade <-factor(data.CE$grade)obj <-coxph(Surv(time, status)~ hormone + meno + age + size + grade+ prog + estrg, data = data.CE)#summarize resultssummary(obj)# Wald test on tumor grade# H_0: beta_5=beta_6=0beta_q <- obj$coefficients[5:6]Sigma_q <- obj$var[5:6,5:6]#chisq statistic with 2 d.f.chisq2 <-t(beta_q) %*%solve(Sigma_q) %*% beta_q#p-valuepval <-1-pchisq(chisq2, 2)### explore forest plotlibrary(survminer)ggforest(obj, data = data.CE)####### Get the Breslow estimates of baseline### cumulative hazard functionLambda0 <-basehaz(obj,centered =FALSE)#plot the baseline hazard functionpar(mfrow=c(1,1))plot(stepfun(Lambda0$time,c(0,Lambda0$hazard)),do.points=F,xlim=c(0,100),ylim=c(0,0.8),lwd=2,frame.plot=F, xlab="Time (months)",ylab="Baseline cumulative hazard function", main="")############################# Residual analysis# (Figures 4.3--4.6)################################################################### First get the Cox-Snell residuals.;## The default residuals of coxph in R are the martingale residuals.## resid(obj,type=c("martingale", "deviance", "score", "schoenfeld",## "dfbeta", "dfbetas", "scaledsch","partial"))## Use relationship between cox-snell and martingal## residualscoxsnellres <- data.CE$status-resid(obj, type="martingale")## Then use N-A method to estimate the cumulative ## hazard function for residuals;fit <-survfit(Surv(coxsnellres,data.CE$status) ~1)Htilde <-cumsum(fit$n.event/fit$n.risk)plot(log(fit$time),log(Htilde), xlab="log(t)", frame.plot =FALSE,ylab="log-cumulative hazard", xlim =c(-8, 2), ylim =c(-8, 2))abline(0, 1, lty =3, lwd =1.5)# rescaled Schoelfeld residualssch <-cox.zph(obj) # chi-square test results for proportionality# of each covariate and overallsch$table### calculated residuals ###sch_time <- sch$time # the X_i's (m-vector)sch_resids <- sch$y # rescaled residuals (m x p matrix)# Plot rescaled Schoelfeld residuals for each covariate# using the object sch directlypar(mar =c(4, 4, 2, 2), mfrow=c(4,2))plot(sch, xlab="Time (months)", lwd=2, cex.lab=1.2, cex.axis=1.2,ylab =c("Hormone", "Menopause", "Age", "Tumor size","Tumor grade", "Progesterone", "Estrogen"))## an alternative graphic provided in survminer package# ggcoxzph(sch)# To address non-proportionality of tumor grade# re-fit the Cox proportional hazards model# stratified by tumor gradeobj.stra <-coxph(Surv(time, status) ~ hormone + meno+ age + size + prog + estrg +strata(grade), data = data.CE)#Schoelfeld#produce proportionality test resultssch.stra <-cox.zph(obj.stra) round(sch.stra$table, 4)## residual plots for stratified modelpar(mfrow=c(3,2))plot(sch.stra, xlab="Time (months)", lwd=2, cex.lab=1.2, cex.axis=1.2,ylab =c("Hormone", "Menopause", "Age", "Tumor size","Progesterone", "Estrogen"))## Martingale residualsmart_resid <-resid(obj.stra,type ='martingale')## Deviance residualsdev_resid <-resid(obj.stra,type ='deviance')#plot the martingale residuals against# the fours quantitative covariates:# age, tumor size, progesterone and# estrogen receptor levels## Agepar(mfrow=c(2,2))plot(data.CE$age, mart_resid,xlab="Age (years)", ylab="Martingale residuals",main='Age',cex.lab=1.2,cex.axis=1.2)lines(lowess(data.CE$age, mart_resid), lwd =2)abline(0,0,lty=3,lwd=2)## Tumor sizeplot(data.CE$size, mart_resid,xlab="Tumor size (mm)", ylab="Martingale residuals",main='Tumor size',cex.lab=1.2,cex.axis=1.2)lines(lowess(data.CE$size, mart_resid),lwd=2)abline(0,0,lty=3,lwd=2)## Progesteroneplot(data.CE$prog, mart_resid,xlab="Progesterone receptor (fmol/mg)", ylab="Martingale Residuals",main='Progesterone',cex.lab=1.2,cex.axis=1.2)lines(lowess(data.CE$prog, mart_resid),lwd=2)abline(0,0,lty=3,lwd=2)## Estrogenplot(data.CE$estrg, mart_resid,xlab="Estrogen receptor (fmol/mg)", ylab="Martingale Residuals",main='Estrogen',cex.lab=1.2,cex.axis=1.2)lines(lowess(data.CE$estrg, mart_resid),lwd=2)abline(0,0,lty=3,lwd=2)# To address non-linear age# categorize age in agec# age<=40: agec=1# 40<age<=60: agec=2# age>60: agec=3data.CE$agec <- (data.CE$age<=40)+2*(data.CE$age>40&data.CE$age<=60)+3*(data.CE$age>60)data.CE$agec <-factor(data.CE$agec)#re-fit the model with agecobj.stra.final <-coxph(Surv(time,status)~ hormone + meno+ agec + size + prog + estrg +strata(grade), data = data.CE)#plot the estimated HRs for agec (Figure 4.6)# and confidence intervalsfinal.sum <-summary(obj.stra.final)final.sum# Plot the age-group-specific HR and confidence# intervals from the re-fitted modelci.table=final.sum$conf.inthr=ci.table[3:4,1]hr.low=ci.table[3:4,3]hr.up=ci.table[3:4,4]par(mfrow=c(1,1))plot(1:3,c(1,hr),ylim=c(0,1.2),frame=F,xaxt='n',xlab="Age (years)", ylab="Hazard ratio",pch=19,cex=1.5,cex.lab=1.2,cex.axis=1.2)axis(1, at=c(1,2,3),labels=c("(20, 40]","(40, 60]","(60, 80]"),cex.axis=1.2)# horizontal error barsarrows(2:3, hr.low, 2:3, hr.up, length=0.05, angle=90, code=3,lwd=2)lines(1:3,c(1,hr),lty=3,lwd=2)############################################ Illustration with time-varying covariates# Stanford heart study###########################################head(heart)## change variable name "year" -> "accpt"colnames(heart)[5] <-"accpt"#sample sizen <-length(unique(heart$id))# fit a Cox model with time-dependent "transplant"obj <-coxph(Surv(start, stop, event) ~ age + accpt + surgery + transplant, data=heart)#summarize resultssummary(obj)