Chapter 4 - Cox Proportional Hazards Regression

Slides

Lecture slides here. (To convert html to pdf, press E \(\to\) Print \(\to\) Destination: Save to pdf)

Base R Code

Show the code
##################################################################
# This code generates all numerical results in chapter 4.      ##
##################################################################


###################################################
# Section 4.2.6 (Figure 4.2)                
# Cox proportional hazards model on GBC study data
###################################################
library(survival)
##############################################
# GBC study
#############################################
# read in the complete data
gbc <- read.table("Data//German Breast Cancer Study//gbc.txt")

# subset to first event data
# Sort the data by time within each id
o <- order(gbc$id,gbc$time)
gbc <- gbc[o,]
# get the first row for each id
data.CE <- gbc[!duplicated(gbc$id),]

# set status=1 if status==2 or 1
data.CE$status <- (data.CE$status>0)+0


# fit the Cox proportional hazards model
## preparation

data.CE$hormone <- factor(data.CE$hormone)
data.CE$meno <- factor(data.CE$meno)
data.CE$grade <- factor(data.CE$grade)
  
obj <- coxph(Surv(time, status)~ hormone + meno + age + size + grade
           + prog + estrg, data = data.CE)

#summarize results
summary(obj)

# Wald test on tumor grade
# H_0: beta_5=beta_6=0
beta_q <- obj$coefficients[5:6]
Sigma_q <- obj$var[5:6,5:6]

#chisq statistic with 2 d.f.
chisq2 <- t(beta_q) %*% solve(Sigma_q) %*% beta_q
#p-value
pval <- 1 - pchisq(chisq2, 2)


### explore forest plot
library(survminer)
ggforest(obj, data = data.CE)
####


### Get the Breslow estimates of baseline
### cumulative hazard function
Lambda0 <- basehaz(obj,centered = FALSE)
#plot the baseline hazard function
par(mfrow=c(1,1))
plot(stepfun(Lambda0$time,c(0,Lambda0$hazard)),
     do.points=F,xlim=c(0,100),ylim=c(0,0.8),lwd=2,frame.plot=F, 
     xlab="Time (months)",ylab="Baseline cumulative hazard function", 
     main="")





############################
# Residual analysis
# (Figures 4.3--4.6)
############################



#####################################

## First get the Cox-Snell residuals.;
## The default residuals of coxph in R are the martingale residuals.
## resid(obj,type=c("martingale", "deviance", "score", "schoenfeld",
##                   "dfbeta", "dfbetas", "scaledsch","partial"))



## Use relationship between cox-snell and martingal
## residuals

coxsnellres <- data.CE$status-resid(obj, type="martingale")
## Then use N-A method to estimate the cumulative 
## hazard function for residuals;
fit <- survfit(Surv(coxsnellres,data.CE$status) ~ 1)
Htilde <- cumsum(fit$n.event/fit$n.risk)
plot(log(fit$time),log(Htilde), xlab="log(t)", frame.plot = FALSE,
     ylab="log-cumulative hazard", xlim = c(-8, 2), ylim = c(-8, 2))
abline(0, 1, lty = 3, lwd = 1.5)


# rescaled Schoelfeld residuals
sch <- cox.zph(obj) 

# chi-square test results for proportionality
# of each covariate and overall
sch$table

### calculated residuals ###
sch_time <- sch$time # the X_i's (m-vector)
sch_resids <-  sch$y # rescaled residuals (m x p matrix)

# Plot rescaled Schoelfeld residuals for each covariate
# using the object sch directly
par(mar = c(4, 4, 2, 2), mfrow=c(4,2))
plot(sch, xlab="Time (months)", lwd=2, cex.lab=1.2, cex.axis=1.2,
     ylab = c("Hormone", "Menopause", "Age", "Tumor size",
               "Tumor grade", "Progesterone", "Estrogen"))

## an alternative graphic provided in survminer package
# ggcoxzph(sch)


# To address non-proportionality of tumor grade
# re-fit the Cox proportional hazards model
# stratified by tumor grade
obj.stra <- coxph(Surv(time, status) ~ hormone + meno
         + age + size + prog + estrg + strata(grade), data = data.CE)
#Schoelfeld
#produce proportionality test results
sch.stra <- cox.zph(obj.stra) 
round(sch.stra$table, 4)

## residual plots for stratified model
par(mfrow=c(3,2))
plot(sch.stra, xlab="Time (months)", lwd=2, cex.lab=1.2, cex.axis=1.2,
     ylab = c("Hormone", "Menopause", "Age", "Tumor size",
              "Progesterone", "Estrogen"))



## Martingale residuals
mart_resid <- resid(obj.stra,type = 'martingale')
## Deviance residuals
dev_resid <- resid(obj.stra,type = 'deviance')


#plot the martingale residuals against
# the fours quantitative covariates:
# age, tumor size, progesterone and
# estrogen receptor levels

## Age
par(mfrow=c(2,2))
plot(data.CE$age, mart_resid,
     xlab="Age (years)", ylab="Martingale residuals",
     main='Age',cex.lab=1.2,cex.axis=1.2)
lines(lowess(data.CE$age, mart_resid), lwd = 2)
abline(0,0,lty=3,lwd=2)

## Tumor size
plot(data.CE$size, mart_resid,
     xlab="Tumor size (mm)", ylab="Martingale residuals",
     main='Tumor size',cex.lab=1.2,cex.axis=1.2)
lines(lowess(data.CE$size, mart_resid),lwd=2)
abline(0,0,lty=3,lwd=2)

## Progesterone
plot(data.CE$prog, mart_resid,
     xlab="Progesterone receptor (fmol/mg)", ylab="Martingale Residuals",
     main='Progesterone',cex.lab=1.2,cex.axis=1.2)
lines(lowess(data.CE$prog, mart_resid),lwd=2)
abline(0,0,lty=3,lwd=2)


## Estrogen
plot(data.CE$estrg, mart_resid,
     xlab="Estrogen receptor (fmol/mg)", ylab="Martingale Residuals",
     main='Estrogen',cex.lab=1.2,cex.axis=1.2)
lines(lowess(data.CE$estrg, mart_resid),lwd=2)
abline(0,0,lty=3,lwd=2)


# To address non-linear age
# categorize age in agec
# age<=40: agec=1
# 40<age<=60: agec=2
# age>60: agec=3
data.CE$agec <- (data.CE$age<=40)+2*(data.CE$age>40&data.CE$age<=60)+
    3*(data.CE$age>60)
data.CE$agec <- factor(data.CE$agec)


#re-fit the model with agec
obj.stra.final <- coxph(Surv(time,status)~ hormone + meno
               + agec + size + prog + estrg + strata(grade), data = data.CE)


#plot the estimated HRs for agec (Figure 4.6)
# and confidence intervals
final.sum <- summary(obj.stra.final)
final.sum


# Plot the age-group-specific HR and confidence
# intervals from the re-fitted model
ci.table=final.sum$conf.int
hr=ci.table[3:4,1]
hr.low=ci.table[3:4,3]
hr.up=ci.table[3:4,4]

par(mfrow=c(1,1))
plot(1:3,c(1,hr),ylim=c(0,1.2),frame=F,xaxt='n',
     xlab="Age (years)", ylab="Hazard ratio",pch=19,cex=1.5,cex.lab=1.2,
     cex.axis=1.2)
axis(1, at=c(1,2,3),labels=c("(20, 40]","(40, 60]","(60, 80]"),cex.axis=1.2)
# horizontal error bars
arrows(2:3, hr.low, 2:3, hr.up, length=0.05, angle=90, code=3,lwd=2)
lines(1:3,c(1,hr),lty=3,lwd=2)




###########################################
# Illustration with time-varying covariates
# Stanford heart study
###########################################
head(heart)
## change variable name "year" -> "accpt"
colnames(heart)[5] <- "accpt"

#sample size
n <- length(unique(heart$id))

# fit a Cox model with time-dependent "transplant"
obj <- coxph(Surv(start, stop, event) ~ age + accpt + surgery + transplant, data=heart)
#summarize results
summary(obj)