Chapter 13 - Composite Endpoints

Slides

Lecture slides here. (To convert html to pdf, press E \(\to\) Print \(\to\) Destination: Save to pdf)

Base R Code

Show the code
#######################################################
#  Section 13.2 RMT-IF analysis of the high-risk
#   subgroup of the HF-ACTION study data
#######################################################


# install load the rmt package
install.packages("rmt")
library(rmt)
##### Read in HF-ACTION DATA########
data("hfaction")

data <- hfaction

# number of unique patients by treatment
# group and overall
uid <- unique(data$patid)
n <- length(uid)

uid1 <- unique(data$patid[data$trt_ab==1])
n1 <- length(uid1)

uid0 <- unique(data$patid[data$trt_ab==0])
n0 <- length(uid0)


## average number of hosps
sum(data$status[data$trt_ab==1]==1)/n1
sum(data$status[data$trt_ab==0]==1)/n0

## total number of hosps
sum(data$status==1)
## total number of deaths
sum(data$status==2)


# extract variables for analysis by
# rmtfit()
id <- data$patid
time <- data$time
status <- data$status
trt <- data$trt_ab



###### Standard RMST analysis #################
library(survRM2)

### create datasets for time to first event ###
data.TFE <- data[!duplicated(data$patid),]

### create datasets for overall mortality ###
## death: status=2
data_death <- data[data$status==2|data$status==0,]

##RMST analysis for hospitalization-free survival and overall survival
rmst2(data.TFE$time, data.TFE$status>0, data.TFE$trt_ab, tau=3.97)
rmst2(data_death$time, data_death$status>0, data_death$trt_ab, tau=3.97)


#### Kaplan-Meier (KM) curves for the hospitalization-free survival
## (time to the first event) and overall survival by treatment group.

# Fit KM curves by trt_ab
obj.TFE <- survfit(Surv(time,status>0)~trt_ab,data=data.TFE)
obj.death <- survfit(Surv(time,status==2)~trt_ab,data=data_death)


# Plot Fig 13.1
par(mfrow=c(1,2))
plot(obj.TFE,main="Hopitalization-free survival",lty=1:2,ylab="Survival probabilities",
     xlab="Time (years)", lwd=2)
legend("bottomleft",c("Usual care","Exercise training"),lwd=2, lty=1:2)
abline(v=4,lty=3,lwd=2)

plot(obj.death,main="Overall survival",lty=1:2,ylab="Survival probabilities",
     xlab="Time (years)", lwd=2)
legend("bottomleft",lty=1:2,c("Usual care","Exercise training"),lwd=2)
abline(v=4,lty=3,lwd=2)

########################
# RMT-IF analysis
########################


# analyze the data using rmtfit()
obj <- rmtfit(rec(patid,time,status)~trt_ab,data=data)
# Alternatively
# obj=rmtfit(id,time,status,trt,type="recurrent")

summary(obj, Kmax=1, tau=3.97)

#############################################################
# Graphical analysis of the HF-ACTION trial to
# evaluate the effect of exercise training.
###########################################################

par(mfrow=c(1,2))
# Kmax=4: to aggregate k=4,..., K
bouquet(obj,Kmax=4,cex.group = 1.0,cex.lab=1.5,cex.axis=1.5,
        xlab="Restricted mean win/loss time (years)",
        ylab="Follow-up time (years)",group.label=F,ylim=c(0,4.2))
text(-0.8,4.15,paste("Usual care"),cex=1.2)
text(0.8,4.15,paste("Exercise training"),cex=1.2)

plot(obj,conf=T,lwd=2, cex.lab=1.5,cex.axis=1.5,xlab="Follow-up time (years)",
     ylab="RMT-IF of training (years)",main="")
par(mfrow=c(1,1))

### LaTeX table ###

### format to LATEX table
pval_fmt3=function(x){
  if(x<0.001){
    return("$<$0.001")
  }else{
    return(round(x,3))
  }
}


ltable=NULL
# aggregate the results for k=1,..., K
hosp_sum=summary(obj,Kmax=1,tau=3.97)$tab
# aggregate the results for k=4,..., K
all_sum=summary(obj,Kmax=4,tau=3.97)$tab

ltable=c("&","&","&",round(12*hosp_sum[1,1],2),"&",round(12*hosp_sum[1,2],2),"&",pval_fmt3(hosp_sum[1,4]),"\\")

for (i in 1:6){
  tmp=c("&",i,"&&",round(12*all_sum[i,1],2),"&",round(12*all_sum[i,2],2),"&",pval_fmt3(all_sum[i,4]),"\\")
  ltable=rbind(ltable,tmp)
}

ltable[5,2]="4+"
ltable[6:7,2]=""

rownames(ltable)=c("Hopitalization","","" ,"","","Death","Overall")

noquote(ltable)





##################################################################
# This code is related to the numerical results in chapter 13   ##
##################################################################


# The real data used in Section 13.3.7 are protected by data sharing agreement.
# Below is a similar analysis based on a subset of the data using the "WR" package.
# The analysis illustrates the use of the pwreg() function for fitting Pocock's PW
# regression model and score.proc() function for plotting standardized score processes


# An example: HF-ACTION study

# We consider a dataset from the HF-ACTION study consisting of 451 non-ischemic heart failure
# patients. The study was conducted between April 2003 through Feb 2007 at 82 sites in the USA,
# Canada, and France (O'Connor et al., 2009). The study objective was to assess the effect of
# adding aerobic exercise training to usual care on the patients CV outcomes. The primary
# endpoint was a composite of all-cause death and all-cause hospitalization.

#######################################################################
# We first load the WR package and the analysis dataset non_ischemic
#######################################################################

# If the "WR" hasn't been installed, need to download and install it
# from CRAN
install.packages("WR")
library(WR)
#> Loading required package: survival
head(non_ischemic)
#>   ID time status trt_ab age sex Black.vs.White Other.vs.White   bmi
#> 1  1  221      2      0  62   1              0              0 25.18
#> 2  1  383      0      0  62   1              0              0 25.18
#> 3  2   23      2      0  75   1              1              0 22.96
#> 4  2 1400      0      0  75   1              1              0 22.96
#> 5  5    7      2      0  48   1              1              0 34.37
#> 6  5   10      1      0  48   1              1              0 34.37
#>   bipllvef hyperten COPD diabetes acei betab smokecurr
#> 1    32.24        0    0        0    0     1         1
#> 2    32.24        0    0        0    0     1         1
#> 3    21.71        1    0        0    0     1         0
#> 4    21.71        1    0        0    0     1         0
#> 5    22.97        1    0        0    0     1         0
#> 6    22.97        1    0        0    0     1         0


# Re-label the covariates with informative names.

colnames(non_ischemic)[4:16]=c(
  "Training vs Usual","Age (year)","Male vs Female","Black vs White",
  "Other vs White", "BMI","LVEF","Hypertension","COPD","Diabetes",
  "ACE Inhibitor","Beta Blocker", "Smoker"
)

#Compute the sample size the median length of follow-up.

# sample size
length(unique(non_ischemic$ID))
#> [1] 451
# median length of follow-up time
median(non_ischemic$time[non_ischemic$status<2])/30.5
#> [1] 31.63934

#So we have n=451 unique patients with a median follow-up of 31.6 months.


################################################################
# Next, we use the pwreg() function to fit the PW model:
################################################################

# get the number of rows and number of covariates.
nr <- nrow(non_ischemic)
p <- ncol(non_ischemic) - 3

# extract ID, time, status and covariates matrix Z from the data.
# note that: ID, time and status should be column vector.
# covariatesZ should be (nr, p) matrix.
ID <- non_ischemic[,"ID"]
time <- non_ischemic[,"time"]
status <- non_ischemic[,"status"]
Z <- as.matrix(non_ischemic[,4:(3+p)], nr, p)


# pass the parameters into the function
pwreg.obj <- pwreg(ID=ID, time=time, status=status, Z=Z)

print(pwreg.obj)
#> Call:
#> pwreg(time = time, status = status, Z = Z, ID = ID)
#>
#> Proportional win-fractions regression models for priority-adjusted composite endpoint
#>
#>
#>
#> Total number of pairs: 101475
#> Wins-losses on death:  7644 (7.5%)
#> Wins-losses on non-fatal event:  78387 (77.2%)
#> Indeterminate pairs 15444 (15.2%)
#>
#> Newton-Raphson algorithm converged in 5 iterations.
#>
#> Overall test: chisq test with 13 degrees of freedom;
#>  Wald statistic 24.9 with p-value 0.02392931
#>
#> Estimates for Regression parameters:
#>
#>                     Estimate         se z.value p.value
#> Training vs Usual  0.1906687  0.1264658  1.5077 0.13164
#> Age (year)        -0.0128306  0.0057285 -2.2398 0.02510 *
#> Male vs Female    -0.1552923  0.1294198 -1.1999 0.23017
#> Black vs White    -0.3026335  0.1461330 -2.0709 0.03836 *
#> Other vs White    -0.3565390  0.3424360 -1.0412 0.29779
#> BMI               -0.0181310  0.0097582 -1.8580 0.06316 .
#> LVEF               0.0214905  0.0086449  2.4859 0.01292 *
#> Hypertension      -0.0318291  0.1456217 -0.2186 0.82698
#> COPD              -0.4023069  0.2066821 -1.9465 0.05159 .
#> Diabetes           0.0703990  0.1419998  0.4958 0.62006
#> ACE Inhibitor     -0.1068201  0.1571317 -0.6798 0.49662
#> Beta Blocker      -0.5344979  0.3289319 -1.6250 0.10417
#> Smoker            -0.0602350  0.1682826 -0.3579 0.72039
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Point and interval estimates for for win ratios:
#>
#>                   Win Ratio 95% lower CL 95% higher CL
#> Training vs Usual 1.2100585    0.9444056     1.5504374
#> Age (year)        0.9872513    0.9762288     0.9983983
#> Male vs Female    0.8561648    0.6643471     1.1033663
#> Black vs White    0.7388699    0.5548548     0.9839127
#> Other vs White    0.7000951    0.3578286     1.3697431
#> BMI               0.9820323    0.9634287     1.0009952
#> LVEF              1.0217231    1.0045572     1.0391823
#> Hypertension      0.9686721    0.7281543     1.2886357
#> COPD              0.6687755    0.4460178     1.0027865
#> Diabetes          1.0729362    0.8122757     1.4172433
#> ACE Inhibitor     0.8986873    0.6604773     1.2228110
#> Beta Blocker      0.5859634    0.3075270     1.1164977
#> Smoker            0.9415433    0.6770144     1.3094312
#
# The output consists of three parts. The first part presents some descriptive statistics
# on the proportions of win-loss status among all (n2)=101,475
# pairs. According to the output, 7.5% of them are determined by death;
# 77.2% by hospitalization, and the remaining 7.2% are indeterminate.
# It also reports an overall (Wald) test with p-value 0.024, suggesting that,
# at the conventional 0.05 level, the 13 covariates are significantly associated
# with the composite outcome.
#
# The second part presents a table for the estimates and standard errors of the regression
# coefficient, along with their corresponding p -value for testing the coefficient being
# zero. The third part is perhaps the most informative, tabulating the estimated win ratios
# (exponential of the regression coefficients) and their associated 95% confidence intervals.
# We can see that a patient in exercise training is 21% more likely to have a better
#  priority-adjusted composite outcome than one in usual care. However, this difference
# is statistically not significant. In addition, younger age, white race, higher LVEF are
# significantly associated with more favorable outcomes than otherwise, while the beneficial
# effects of low BMI and absence of COPD history are border-line significant.
#


# To assess the effect of race on the composite outcome, we test the null hypothesis
# H0:\beta_4=\beta_5=0.
# We conduct a 2-df Chi-square Wald test based on (\beta_4,\beta_5)


  #extract estimates of (\beta_4,\beta_5)
  beta <- matrix(pwreg.obj$beta[4:5])
  #extract estimated covariance matrix for (\beta_4,\beta_5)
  Sigma <- pwreg.obj$Var[4:5,4:5]
  #compute chisq statistic in quadratic form
  chistats <- t(beta) %*% solve(Sigma) %*% beta

# compare the Wald statistic with the reference
# distribution of chisq(2) to obtain the p-value
1 - pchisq(chistats, df = 2)
#>           [,1]
#> [1,] 0.1016988

#  The p -value is 0.102. So the overall effect of race on the composite outcome is
# non-significant.


#  Finally, we use the score.proc() function to plot the standardized score process
# for each covariate:

  score.obj <- score.proc(pwreg.obj)
  print(score.obj)
#> This object contains two components:
#>  't': an l-vector of times
#>  'score': a p-by-l matrix whose k'th row is the standardized score process for the k'th
  # covariate
#>           as a function of t
#>
#> Use 'plot(object,k=k)' to plot the k'th score process.

oldpar <- par(mfrow = par("mfrow"))
par(mfrow = c(4,4))
for(i in c(1:13)){
  plot(score.obj, k = i)
}
par(oldpar)