Chapter slides here. (To convert html to pdf, press E \(\to\) Print \(\to\) Destination: Save to pdf)
R-code
Show the code
################################################################### This code generates the numerical results in chapter 4 #################################################################### install.packages("WR")library(WR)library(tidyverse)library(knitr) # for formatted table output# load the datadata(non_ischemic)# head(non_ischemic)#> non_ischemic dataset containing columns for ID, time, status, trt_ab, and covariates# -------------------------------------------------------------------------# Descriptive Analysis# -------------------------------------------------------------------------# function to convert 1-0 to Yes-No stringsone_zero_to_yn <-function(x) {if_else(x ==1, "Yes", "No")}# clean up datadf <- non_ischemic |>filter(status !=2) |># remove rows where status=2 (duplicates or not needed)mutate(trt_ab =fct(if_else(trt_ab ==0, "Usual care", "Training")), # Convert trt_ab=0 -> "Usual care", 1 -> "Training"sex =if_else(sex ==1, "Female", "Male"),# Convert numeric sex variable to "Female"/"Male"race =case_when( Black.vs.White ==1~"Black", Other.vs.White ==1~"Other", Black.vs.White ==0& Other.vs.White ==0~"White" ),# Recode race based on binary indicatorsrace =fct(race), # Convert to factoracross(hyperten:smokecurr, one_zero_to_yn)# Convert all these binary variables to "Yes"/"No" strings )## A function to compute median (IQR) for a numeric vector 'x', ## rounded to the r-th decimal placemed_iqr <-function(x, r =1) { qt <-quantile(x, na.rm =TRUE) # Calculate quartilesstr_c(round(qt[3], r), " (", # Medianround(qt[2], r), ", ", # Q1round(qt[4], r), ")"# Q3 )}# create summary table for quantitative variables# We summarize across arms: 'age', 'bmi', and 'bipllvef' (Left Ventricular Ejection Fraction)tab_quant <- df |>group_by(trt_ab) |>summarize(across(c(age, bmi, bipllvef), med_iqr) ) |>pivot_longer(!trt_ab,values_to ="value",names_to ="name" ) |>pivot_wider(values_from = value,names_from = trt_ab ) |>mutate(name =case_when( name =="age"~"Age (years)", name =="bmi"~"BMI", name =="bipllvef"~"LVEF (%)" ) )## A function that computes N (%) for each level of `var`,## grouped by 'group' in df (percent rounded to r-th decimal).freq_pct <-function(df, group, var, r =1) {# Tally up the count n for each level of 'var' by 'group' var_counts <- df |>group_by({{ group }}, {{ var }}) |>summarize(n =n(),.groups ="drop" )# Join with the total count N in each group, then compute "n (xx%)" var_counts |>left_join( var_counts |>group_by({{ group }}) |>summarize(N =sum(n)),by =join_by({{ group }}) ) |>mutate(value =str_c(n, " (", round(100* n / N, r), "%)") ) |>select(-c(n, N)) |>pivot_wider(names_from = {{ group }},values_from = value ) |>rename(name = {{ var }} )}# Apply freq_pct() to sex, race, hyperten:smokecurr# sexsex <- df |>freq_pct(trt_ab, sex) |>mutate(name =str_c("Sex - ", name) # e.g., "Sex - Female", "Sex - Male" )# racerace <- df |>freq_pct(trt_ab, race) |>mutate(name =str_c("Race - ", name)# e.g., "Race - Black", "Race - White", etc. )hyperten <- df |>freq_pct(trt_ab, hyperten) |>filter(name =="Yes") |>mutate(name ="Hypertension" )# The following block is repeated as in the original code:hyperten <- df |>freq_pct(trt_ab, hyperten) |>filter(name =="Yes") |>mutate(name ="Hypertension" )COPD <- df |>freq_pct(trt_ab, COPD) |>filter(name =="Yes") |>mutate(name ="COPD" )diabetes <- df |>freq_pct(trt_ab, diabetes) |>filter(name =="Yes") |>mutate(name ="Diabetes" )acei <- df |>freq_pct(trt_ab, acei) |>filter(name =="Yes") |>mutate(name ="ACE Inhibitor" )betab <- df |>freq_pct(trt_ab, betab) |>filter(name =="Yes") |>mutate(name ="Beta Blocker" )smokecurr <- df |>freq_pct(trt_ab, betab) |># This line references betab for groupingfilter(name =="smokecurr") |>mutate(name ="Smoker" )# Combine all the partial tables (quantitative + categorical summaries)tabone <-bind_rows( tab_quant[1, ], sex, race, tab_quant[2:3, ], hyperten, COPD, diabetes, acei, betab, smokecurr)## Add the group sample size (N=...) to column namescolnames(tabone) <-c(" ",str_c(colnames(tabone)[2:3]," (N=", table(df$trt_ab), ")" ))## Print out the final table in a formatted mannerkable(tabone, align =c("lcc"))# PW regression analysis --------------------------------------------------# The following code chunk is not executed by default (eval=FALSE) but # shows how to fit piecewise regression using 'pwreg()'.# re-label the covariates with informative names.colnames(non_ischemic)[4:16] =c("Training vs Usual", "Age (year)", "Male vs Female", "Black vs White", "Other vs White", "BMI", "LVEF", "Hypertension", "COPD", "Diabetes","ACE Inhibitor", "Beta Blocker", "Smoker")p <-ncol(non_ischemic) -3# extract ID, time, status, and covariates matrix Z# note that ID, time, and status should be column vectorsID <- non_ischemic[, "ID"]time <- non_ischemic[, "time"] /30.5# convert days to monthsstatus <- non_ischemic[, "status"]Z <-as.matrix(non_ischemic[, 4:(3+ p)])# pass parameters into the functionobj <-pwreg(ID, time, status, Z)obj#> Displays the main results from piecewise regression# extract estimates of (beta_4, beta_5)beta <-matrix(obj$beta[4:5])# extract estimated covariance matrix for (beta_4, beta_5)Sigma <- obj$Var[4:5, 4:5]# compute chisq statistic in quadratic form to jointly test beta_4, beta_5chistats <-t(beta) %*%solve(Sigma) %*% beta# compare the Wald statistic with a chisq(2) reference distribution1-pchisq(chistats, df =2)#> example p-value for the joint test# compute score processesscore_obj <-score.proc(obj)# plot scores for all 13 covariates to check time-varying effectspar(mfrow =c(4, 4))for (i in1:13) {plot(score_obj, k = i, xlab ="Time (months)")# add reference lines for approx. +/-2 standard errorsabline(a =2, b =0, lty =3)abline(a =0, b =0, lty =3)abline(a =-2, b =0, lty =3)}### Exercise: Stratify by sex #### Drop the sex variable from Z (assuming it's at column 3)sex <- Z[, 3]Zs <- Z[, -3]obj_str <-pwreg(ID, time, status, Zs, strata = sex)obj_str#> Fits a PW regression model stratified by sex
\[
\def\T{{\rm T}}
\]
4.1 Regression Framework
The proportional win-fractions (PW) model provides a semiparametric regression framework for composite outcomes defined by prioritized win/loss rules. Like the Cox model for univariate time-to-event data, the PW model enables covariate adjustment and improves efficiency. It also generalizes the two-sample win ratio by allowing treatment comparisons to be conditional on baseline characteristics.
Given two subjects with covariates \(Z_i\) and \(Z_j\), the conditional win ratio is defined as \[
WR(t; Z_i, Z_j;\mathcal W) = \frac{E\left\{\mathcal{W}(\mathcal H_i^*, \mathcal H_j^*)(t) \mid Z_i, Z_j \right\}}{E\left\{\mathcal{W}(\mathcal H_j^*, \mathcal H_i^*)(t) \mid Z_i, Z_j \right\}},
\] where \(\mathcal{W}\) is a prespecified win function and the ratio represents the odds that subject \(i\) wins against \(j\).
4.2 Proportional Win-Fractions Model
4.2.1 Model Specification
Under the proportional win-fractions assumption, the conditional win ratio is modeled as \[
WR(t \mid Z_i, Z_j; \mathcal W) = \exp\left\{\beta^\T (Z_i - Z_j)\right\},
\] where \(\beta\) is a vector of regression coefficients interpreted on the log win-ratio scale. The PW model is semiparametric: it imposes parametric structure on covariate effects while leaving the distribution of outcomes unspecified.
The model includes standard Cox regression and two-sample WR as special cases. For instance, when \(\mathcal W\) corresponds to time-to-first-event comparisons, PW reduces to the Cox model. When \(Z\) is a binary treatment indicator, \(\exp(\beta)\) equals the marginal win ratio between groups.
4.2.2 Estimation and Residuals
Model parameters are estimated by solving a \(U\)-statistic estimating equation involving pairwise win residuals: \[
M_{ij}(t \mid Z_i, Z_j; \beta) = \delta_{ij}(t) - R_{ij}(t) \cdot \frac{\exp\left\{\beta^\T(Z_i - Z_j)\right\}}{1 + \exp\left\{\beta^\T(Z_i - Z_j)\right\}}.
\] The estimating function averages weighted residuals across all subject pairs. Variance is obtained via \(U\)-statistic theory. Proportionality assumptions can be assessed using cumulative residual plots analogous to score processes in the Cox model.
4.2.3 Stratification
To address potential non-proportionality across levels of categorical covariates, the PW model can be extended to allow stratum-specific baselines. In the stratified PW model, win/loss probabilities are modeled only within each stratum. This permits heterogeneity in win functions across subgroups while maintaining proportionality within each.
4.2.4 HF-ACTION Application
The PW model was applied to 451 non-ischemic patients in HF-ACTION using a win function prioritizing death over first hospitalization. Covariates included demographics, comorbidities, and medication use. The overall Wald test for all covariates was statistically significant (p = 0.024). Significant predictors included age, LVEF, and race, with Black patients having lower win probabilities relative to White patients. Residual plots supported the proportionality assumption across most covariates.
4.3 Regularized PW Regression
4.3.1 Elastic Net Penalty
In high-dimensional settings, regularization improves model stability and supports variable selection. The wrnet function estimates the PW model under elastic net penalty. The objective function combines logistic loss over all comparable pairs with an \(\ell_1\)–\(\ell_2\) penalty: \[
l_n(\beta; \lambda) = -\frac{1}{|\mathcal R|} \sum_{(i,j)\in\mathcal R} \left[\delta_{ij} \beta^\T z_{ij} - \log\left\{1 + \exp(\beta^\T z_{ij})\right\} \right] + \lambda \left\{ (1-\alpha)\|\beta\|_2^2/2 + \alpha \|\beta\|_1 \right\}.
\] Cross-validation is performed by partitioning subjects (not pairs), and selecting the penalty \(\lambda\) that maximizes average concordance.
4.3.2 Win Scores and Concordance
The fitted model yields a win score \(\beta^\T Z\) for each subject, which ranks patients by their likelihood of winning under the win rule. The generalized concordance index measures how well these scores align with observed pairwise wins in a validation set: \[
\mathcal{C} = \frac{1}{|\mathcal{R}^*|} \sum_{(i,j) \in \mathcal{R}^*} \left[ I\{(2\delta_{ij} - 1)(\beta^\T z_i - \beta^\T z_j) > 0\} + \frac{1}{2} I(\beta^\T z_i = \beta^\T z_j) \right].
\]
4.3.3 GBC Case Study
The wrnet model was applied to the GBC dataset with 686 patients and 9 covariates. The model was trained via 10-fold subject-based cross-validation. The final model selected a sparse set of predictors and achieved an overall test concordance of 0.66. Death-specific concordance was 0.72. Variable importance rankings confirmed the relevance of tumor grade, nodal status, and hormone therapy.
4.4 Example R code
The following example assumes a data frame df in long format with columns:
######################################### 1. Proportional win-fractions model########################################library(WR)# Fit the PW model using death > nonfatal event win functionobj <-pwreg(ID = df$id, time = df$time, status = df$status, Z = df$Z)# Summarize resultsobj$beta # Estimated coefficientsobj$Var # Variance-covariance matrixprint(obj)# Score residual plotsscore_obj <-score.proc(obj)plot(score_obj, k =1) # Residuals for 1st covariate######################################### 2. Regularized PW regression (WRNet)######################################### Go to https://lmaowisc.github.io/wrnet/ for the R script# Split data into training and test setsset.seed(123)obj_split <- df |>wr_split(prop =0.8)df_train <- obj_split$df_traindf_test <- obj_split$df_test# Cross-validation to select optimal lambdaobj_cv <-cv_wrnet(df_train$id, df_train$time, df_train$status, df_train |> dplyr::select(-id, -time, -status))lambda_opt <- obj_cv$lambda[which.max(obj_cv$concordance)]# Fit final modelfinal_fit <-wrnet(df_train$id, df_train$time, df_train$status, df_train |> dplyr::select(-id, -time, -status),lambda = lambda_opt)# Variable importancefinal_fit |>vi_wrnet() |>vip()# Test set performancetest_wrnet(final_fit, df_test)
4.5 Conclusion
The PW model generalizes the win ratio to adjust for covariates and test prognostic factors. It provides interpretable effect estimates on the win ratio scale, supports model diagnostics via residuals, and enables stratification when needed. The regularized PW model (wrnet) extends these capabilities to high-dimensional settings and supports model tuning and evaluation via cross-validation and concordance analysis.