########################################
# 1. Linear mixed-effects for biomarker
########################################
library(nlme)
<- lme(y ~ covariates + obstime,
longit_sub random = ~ obstime | id,
data = df)
########################################
# 2. Cox model for survival outcome
########################################
library(survival)
<- df[!duplicated(df$id), ]
df_surv <- coxph(Surv(time, status) ~ covariates,
surv_sub data = df_surv, x = TRUE)
########################################
# 3. Fit joint model
########################################
library(JM)
<- jointModel(longit_sub, surv_sub,
obj timeVar = "obstime",
method = "piecewise-PH-aGH")
summary(obj)
Chapter 11 - Joint Analysis of Longitudinal and Survival Data
Slides
Lecture slides here. (To convert html to pdf, press E \(\to\) Print \(\to\) Destination: Save to pdf)
Chapter Summary
When longitudinal biomarkers are repeatedly measured alongside a time-to-event outcome, separate analyses can lead to biased or inefficient results, especially in the presence of informative dropout. Joint models address this by integrating a mixed-effects model for the biomarker process with a survival model for the event time, providing a coherent framework for estimation, interpretation, and prediction.
Longitudinal–survival joint models
The data consist of repeated measurements \(Y_{ij}\) over time \(t_{ij}\) \((j=1,\ldots, n_i)\) and a survival outcome \(T_i\) subject to right censoring.
Linear mixed-effects (LME) model
For continuous biomarkers, we assume: \[ Y_{ij} = \gamma_0 + \gamma^\mathrm{T} Z_{ij} + b_i^\mathrm{T} \tilde{Z}_{ij} + \epsilon_{ij}, \] where \(b_i \sim \mathcal{N}(0, \Sigma_b)\) are subject-specific random effects and \(\epsilon_{ij} \sim \mathcal{N}(0, \sigma^2)\) are measurement errors.- Interpretation: \(\gamma\) are population-level effects; \(b_i\) captures individual-specific deviation.
- Estimation: Maximum likelihood via EM algorithm with \(b_i\) as missing data.
Cox model with latent trajectory
The survival process is linked to the “true” biomarker trajectory \(m_i(t)\): \[ \Pr(t \le T_i < t + \mathrm{d}t \mid Z_i^*, \overline{m}_i(t)) = \lambda_0(t) \exp\left(\beta^\mathrm{T} Z_i^* + \nu m_i(t)\right) \mathrm{d}t. \]- \(\nu\) quantifies the log-hazard ratio per unit increase in the biomarker.
- \(m_i(t)\) may be extracted from an LME or generalized LME model.
Extensions
Effect modification
Interaction of \(m_i(t)\) with treatment or baseline factors: \[ \exp\left\{\nu m_i(t) + \tilde{\nu}^\mathrm{T} \tilde{Z}_i^* m_i(t)\right\}. \]Lagged or cumulative effects
Replace \(m_i(t)\) with lagged or average values, e.g., \[ \overline{m}_i(t - \tau_0, t) = \frac{1}{\tau_0} \int_{t - \tau_0}^t m_i(u) \, \mathrm{d}u. \]Rate of change as predictor
Include derivative \(m_i'(t)\) in the hazard model: \[ \exp\left\{\nu_0 m_i(t) + \nu_1 m_i'(t)\right\}. \]Non-continuous outcomes
Use generalized linear mixed models (GLMMs): \[ g\left[E\{Y_i(t) \mid Z_i(t), b_i\}\right] = \gamma_0 + \gamma^\mathrm{T} Z_i(t) + b_i^\mathrm{T} \tilde{Z}_i(t). \]Multivariate extensions
For multiple biomarkers and/or multiple events: \[ \Pr(t \le T_{ik} < t + \mathrm{d}t \mid \xi_i, \overline{m}_i(t)) = \xi_i \lambda_{0k}(t) \exp\left(\beta_k^\mathrm{T} Z_i^* + \nu_k^\mathrm{T} m_i(t)\right) \mathrm{d}t. \]
Dynamic prediction
Joint models enable real-time prediction of survival probability or biomarker levels:
- Survival probability: \[ \mathcal{S}_i(t \mid u) = \Pr(T_i > t \mid T_i > u, \overline{Y}_i(u)). \]
- Future biomarker level: \[ \mathcal{M}_i(t \mid u) = E[Y_i(t) \mid T_i > u, \overline{Y}_i(u)]. \]
These are computed via Monte Carlo integration over \(p(b_i \mid \overline{Y}_i(u), T_i > u)\).
Example R code
Univariate Gaussian response: JM
package
Below is a code snippet demonstrating two-stage joint modeling using the JM
package. Assume a dataset df
with:
id
: subject ID,y
: longitudinal response,obstime
: time of measurement,
time
: event or censoring time,status
: event indicator,
- covariates:
covariates
.
More complex outcomes: JMbayes2
package
Assume longitudinal outcomes y1
(continuous) and y2
(binary) with covariates obstime
, age
, and sex
.
########################################
# 1. Fit two longitudinal models
########################################
# y1: continuous outcome (e.g., biomarker)
<- lme(y1 ~ obstime + age + sex,
longit_sub1 random = ~ obstime | id, data = df)
# y2: binary outcome (e.g., clinical status)
library(JMbayes2)
<- mixed_model(y2 ~ obstime + age + sex,
longit_sub2 random = ~ obstime | id, data = df,
family = binomial())
########################################
# 2. Fit survival model
########################################
<- df[!duplicated(df$id), ]
df_surv <- coxph(Surv(time, status) ~ age + sex, data = df_surv)
surv_sub
########################################
# 3. Fit joint model with multiple outcomes
########################################
# Define biomarker contributions to hazard
<- list(
fForms "y1" = ~ value(y1) + slope(y1),
"y2" = ~ value(y2)
)
<- jm(surv_sub, list(longit_sub1, longit_sub2),
obj time_var = "obstime",
functional_forms = fForms)
summary(obj)
value(y)
: uses the fitted mean or link-transformed mean (GLMM) as a predictor.slope(y)
: includes rate of change (i.e., \(\mathrm{d} m_i(t)/\mathrm{d} t\)).area(y)
: uses cumulative biomarker value up to time \(t\).
JMbayes2
enables subject-specific predictions of biomarker evolution and survival probabilities using predict()
:
# Predict for subject with ID = 3, up to time u = 3
<- df[df$id == 3 & df$obstime < 3, ]
newdf $surv_time <- 3
newdf$status <- 0
newdf
# Longitudinal prediction
<- predict(obj, newdata = newdf, return_newdata = TRUE)
pred_longit
# Survival prediction
<- predict(obj, newdata = newdf, process = "event", return_newdata = TRUE) pred_surv
Use plot()
to visualize predictions across biomarkers and time horizons, including posterior uncertainty bands.
Conclusion
Joint modeling of longitudinal and survival data integrates biomarker evolution and event risk to improve efficiency, reduce bias from informative dropout, and provide individualized predictions. Extensions accommodate multiple or non-Gaussian biomarkers, flexible risk structures, and varying functional forms. These models are especially valuable in clinical studies where patient monitoring and outcome prediction are central goals.
R Code
Show the code
###############################################################################
# Chapter 11 R Code
#
# This script reproduces all major numerical results in Chapter 11, including:
# 1. Joint modeling of HIV/AIDS data (antiretroviral trial) using {JM}
# 2. Multivariate joint modeling for heart valve study using {JMbayes2}
###############################################################################
#==============================================================================
# (A) Analysis of the Antiretroviral Drug Trial
#==============================================================================
library(tidyverse) # For data manipulation and ggplot
library(survival) # For survival analysis (e.g., coxph)
library(JM) # Joint Modeling package
#------------------------------------------------------------------------------
# 1. Read and Prepare Data
#------------------------------------------------------------------------------
<- read.table("Data//Anti-retroviral Trial//aids.txt")
df head(df)
# Create a de-duplicated data set for the survival sub-model
<- df[!duplicated(df$id), ]
df_surv
#------------------------------------------------------------------------------
# 2. Longitudinal Sub-Model
#------------------------------------------------------------------------------
# Linear trajectory for sqrt(CD4) over time
<- lme(
longit_sub ~ obsmo + obsmo:drug + sex + hist,
CD4 random = ~ obsmo | id,
data = df
)
# Alternative with natural splines:
# longit_sub <- lme(
# CD4 ~ ns(obsmo, 3) + ns(obsmo, 3):drug + sex + hist,
# random = list(id = pdDiag(form = ~ ns(time, 3))),
# data = df
# )
#------------------------------------------------------------------------------
# 3. Survival Sub-Model
#------------------------------------------------------------------------------
<- coxph(
surv_sub Surv(time, status) ~ drug + sex + hist,
data = df_surv,
x = TRUE
)
#------------------------------------------------------------------------------
# 4. Joint Model (Piecewise-Constant Baseline Hazard)
#------------------------------------------------------------------------------
<- jointModel(
obj_joint
longit_sub,
surv_sub,timeVar = "obsmo",
method = "piecewise-PH-aGH"
)
# Alternatively, a 3-month lag:
# obj_join_l3m <- jointModel(
# longit_sub, surv_sub,
# timeVar = "obsmo", lag = 3,
# method = "piecewise-PH-aGH"
# )
# Print summary
summary(obj_joint)
#------------------------------------------------------------------------------
# 5. Parameter Extraction and Basic Table
#------------------------------------------------------------------------------
<- obj_joint$coefficients # Coefficients from model
coef $D # Covariance matrix of random effects
coef<- c(coef$gammas, coef$alpha) # Survival-related parameters
beta <- coef$betas # Longitudinal parameters
gamma
<- obj_joint$Hessian
hmat <- solve(hmat) # Inverse Hessian => approximate variance matrix
Var <- sqrt(diag(Var))
se
# Prepare table for survival sub-model
<- se[str_detect(names(se), "T.")][1:4] # Extract relevant subset
se_beta <- qnorm(0.975)
za
<- tibble(
cox_tab ` ` = c("ddI v. ddC", "Male v. female",
"Infection history (No v. yes)",
"CD4 count (mm^3)"),
HR = round(exp(beta), 2),
CI = paste0(
"(",
round(exp(beta - za * se_beta), 2), ", ",
round(exp(beta + za * se_beta), 2), ")"
),`p-value`= scales::pvalue(2 * (1 - pnorm(abs(beta / se_beta))))
)
::kable(cox_tab, align = "c")
knitr
#------------------------------------------------------------------------------
# 6. Residual Analysis
#------------------------------------------------------------------------------
# Base R diagnostic plots
par(mfrow = c(2, 2))
plot(obj_joint) # built-in {JM} diagnostic
# Extract residuals
<- residuals(obj_joint, process = "Longitudinal", type = "Subject") # subject-level
epsilons <- residuals(obj_joint, process = "Longitudinal", type = "Marginal") # marginal
Rs <- residuals(obj_joint, process = "Event") # event-level
mart
# Fitted m_i(t): subtract subject-specific residual from observed
<- df$CD4 - epsilons
ms
# Plot 1: Observed vs. fitted Y
<- tibble(
pred_v_obs x = ms,
y = df$CD4
%>%
) ggplot(aes(x = x, y = y)) +
geom_point(color = "gray20") +
geom_abline(intercept = 0, slope = 1, linewidth = 0.8, linetype = 2) +
xlim(c(0, 22)) +
ylim(c(0, 22)) +
labs(
x = expression("Fitted " ~ hat(m)[i](t[j])),
y = expression("Observed " ~ Y[ij]),
title = "Overall fit of longitudinal model"
+
) theme_minimal() +
theme(plot.title = element_text(size = 12))
pred_v_obs
# Plot 2: QQ plot for measurement error
<- tibble(
eps_qq epsilons = epsilons
%>%
) ggplot(aes(sample = epsilons)) +
stat_qq(color = "gray20") +
stat_qq_line(linewidth = 0.8, linetype = 2) +
labs(
x = "Normal Quantiles",
y = expression("Quantiles of " ~ hat(epsilon)[ij]),
title = "Normal QQ plot of measurement error"
+
) theme_minimal() +
theme(plot.title = element_text(size = 12))
eps_qq
# Plot 3: Residual vs. observed outcome
<- tibble(
eps_v_y x = df$CD4,
y = epsilons
%>%
) ggplot(aes(x = x, y = y)) +
geom_point(color = "gray20") +
geom_hline(yintercept = 0, linewidth = 0.8, linetype = 2) +
labs(
x = expression("Observed " ~ Y[ij]),
y = expression("Residual " ~ hat(epsilon)[ij]),
title = "Homoscedasticity of measurement error"
+
) theme_minimal() +
theme(plot.title = element_text(size = 12))
eps_v_y
# Plot 4: Martingale residual vs. fitted biomarker
<- tibble(
mart_v_mi x = ms,
y = mart
%>%
) ggplot(aes(x = x, y = y)) +
geom_point(color = "gray20") +
geom_smooth(color = "black", se = FALSE) +
geom_hline(yintercept = 0, linetype = 3) +
labs(
x = expression("Fitted " ~ hat(m)[i](t[j])),
y = "Martingale residuals",
title = "Martingale residual vs. biomarker"
+
) theme_minimal() +
theme(plot.title = element_text(size = 12))
mart_v_mi
# Combine with patchwork
library(patchwork)
+ eps_qq) / (eps_v_y + mart_v_mi)
(pred_v_obs
# Save figure
# ggsave("images/longit_aids_resids.png", width = 8, height = 8)
# ggsave("images/longit_aids_resids.eps", width = 8, height = 8)
#------------------------------------------------------------------------------
# 7. Model-Based Mean Trajectories (Figure)
#------------------------------------------------------------------------------
# Compute the mean trajectories based on the fitted longitudinal sub-model
<- (0:180) / 10
t
<- gamma[1]
g0 <- gamma[2]
g1 <- gamma[4]
g3 <- gamma[5]
g4
# ddC: baseline drug, with/without history
<- (g0 + g3 + g1 * t)^2
ddC_nonhist <- (g0 + g1 * t)^2
ddC_hist
# ddI: alternate drug, with/without history
<- (g0 + g3 + (g1 + g4)*t)^2
ddI_nonhist <- (g0 + (g1 + g4)*t)^2
ddI_hist
# Base R: Two-panel plot
par(mfrow = c(1, 2))
plot(
t, ddC_nonhist,type = "l",
frame.plot = FALSE,
main = "No previous infection",
xlim = c(0, 18),
ylim = c(0, 120),
xlab = "Time (months)",
ylab = "Mean CD4 cell count",
lwd = 2
)lines(t, ddI_nonhist, lty = 3, lwd = 2)
plot(
t, ddC_hist,type = "l",
frame.plot = FALSE,
main = "Previous infection",
xlim = c(0, 18),
ylim = c(0, 120),
xlab = "Time (months)",
ylab = "Mean CD4 cell count",
lwd = 2
)lines(t, ddI_hist, lty = 3, lwd = 2)
# ggplot version
<- length(t)
m tibble(
t = rep(t, 4),
CD4 = c(ddC_nonhist, ddI_nonhist, ddC_hist, ddI_hist),
trt = rep(rep(c("ddC", "ddI"), each = m), 2),
hist = rep(c("No previous infection", "Previous infection"), each = 2*m)
%>%
) ggplot(aes(x = t, y = CD4, linetype = trt)) +
geom_line(linewidth = 0.8) +
facet_wrap(~ hist) +
scale_x_continuous(breaks = seq(0, 18, 6)) +
scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 30)) +
labs(
x = "Time (months)",
y = expression("CD4 cell count / mm"^3),
linetype = NULL
+
) theme_minimal() +
theme(
legend.position = "top",
strip.text = element_text(size = 11),
legend.text = element_text(size = 11),
legend.key.width= unit(1, "cm")
)
# ggsave("images/longit_mean_cd4.png", width = 8, height = 4.5)
# ggsave("images/longit_mean_cd4.eps", width = 8, height = 4.5)
#==============================================================================
# (B) Multivariate Analysis (JMbayes2) – Heart Valve Study
#==============================================================================
library(JMbayes2)
#------------------------------------------------------------------------------
# 1. Read and Inspect Data
#------------------------------------------------------------------------------
<- read.table("Data//Heart Valve Implantation Study//heartv.txt", header = TRUE)
df_valve
# Basic descriptive info
<- length(unique(df_valve$id))
n <- df_valve %>%
num_deaths group_by(id) %>%
slice(1) %>%
filter(status == 1) %>%
nrow()
#------------------------------------------------------------------------------
# 2. Longitudinal Sub-Models
#------------------------------------------------------------------------------
# (1) Binary outcome: ejection fraction <= 50%
<- mixed_model(
ef_mod ~ obsyear + age + sex + vsize + lvef + redo + cabg + acei + hc + emergenc,
ef50 random = ~ obsyear | id,
data = df_valve,
family = binomial()
)summary(ef_mod)
# (2) Log valve gradient
<- lme(
grad_mod ~ obsyear + age + sex + vsize + lvef + redo + cabg + acei + hc + emergenc,
grad random = ~ obsyear | id,
data = df_valve
)summary(grad_mod)
# (3) Log LV mass index
<- lme(
lvmi_mod ~ obsyear + age + sex + vsize + lvef + redo + cabg + acei + hc + emergenc,
lvmi random = ~ obsyear | id,
data = df_valve
)summary(lvmi_mod)
#------------------------------------------------------------------------------
# 3. Survival Sub-Model
#------------------------------------------------------------------------------
<- df_valve[!duplicated(df_valve$id), ]
df_surv_valve <- coxph(
surv_mod Surv(surv_time, status) ~ age + sex,
data = df_surv_valve
)
#------------------------------------------------------------------------------
# 4. Multivariate Joint Model in JMbayes2
#------------------------------------------------------------------------------
# Define functional forms in the Cox sub-model
<- list(
fForms "ef50" = ~ value(ef50),
"grad" = ~ slope(grad) + value(grad),
"lvmi" = ~ slope(lvmi) + value(lvmi)
)
# Fit the joint model (may require MCMC or default settings)
<- jm(
obj_valve
surv_mod,list(ef_mod, grad_mod, lvmi_mod),
time_var = "obsyear",
functional_forms= fForms
)summary(obj_valve)
#------------------------------------------------------------------------------
# 5. Extract and Tabulate Results
#------------------------------------------------------------------------------
<- obj_valve$statistics
stats <- stats$Mean
stats_mean <- stats$SD
stats_sd
# A) Longitudinal sub-model estimates
<- stats_mean$betas1
gamma_ef50_est <- stats_sd$betas1
gamma_ef50_se
<- stats_mean$betas2
gamma_grad_est <- stats_sd$betas2
gamma_grad_se
<- stats_mean$betas3
gamma_lvmi_est <- stats_sd$betas3
gamma_lvmi_se
<- qnorm(0.975)
za
# Helper function to format estimates + CIs
<- function(est, se, r = 2, exponentiate = TRUE) {
est_ci if (exponentiate) {
paste0(
round(exp(est), r), " (",
round(exp(est - za * se), r), ", ",
round(exp(est + za * se), r), ")"
)else {
} paste0(
round(est, r), " (",
round(est - za * se, r), ", ",
round(est + za * se, r), ")"
)
}
}
<- function(est, se) {
pval_calc ::pvalue(1 - pchisq((est / se)^2, df = 1))
scales
}
# Example table
<- tibble(
df_res variable = c(
"(Intercept)", "Years after surgery", "Age (years)", "Male vs. female",
"Valve size", "LVEF grade (1–3)", "History of surgery", "CABG",
"ACE inhibitor", "High cholesterol", "Emergency grade (1–3)"
),EF_OR = est_ci(gamma_ef50_est, gamma_ef50_se, r = 2, exponentiate = TRUE),
Grad = est_ci(gamma_grad_est, gamma_grad_se, r = 2, exponentiate = FALSE),
LVMI = est_ci(gamma_lvmi_est, gamma_lvmi_se, r = 2, exponentiate = FALSE)
)
df_res
# B) Survival sub-model
<- stats_mean$alphas
nu <- stats_sd$alphas
nu_se
<- tibble(
cox_df variable = c(
"EF<= 50% (log-odds)",
"LV gradient (mmHg)",
"LV gradient slope (mmHg/yr)",
"LVMI (g/m^2)",
"LVMI slope (g/m^2/yr)"
),HR = est_ci(nu, nu_se, r = 2, exponentiate = TRUE),
pval = pval_calc(nu, nu_se)
)
cox_df
#------------------------------------------------------------------------------
# 6. Dynamic Predictions
#------------------------------------------------------------------------------
# Example: time t0=3 years for patient id=3
<- 3
t0 <- df_valve[df_valve$id == 3, ] # data for this patient
newdf <- newdf[newdf$obsyear < t0, ] # only times before t0
newdf $status <- 0 # assume alive at t0
newdf$surv_time <- t0
newdf
# Predict longitudinal trajectories from t0 to 10
<- predict(
pred_longit
obj_valve,newdata = newdf,
times = seq(t0, 10, length.out = 10),
return_newdata = TRUE
)
# Base R plot
par(mfrow = c(1, 3))
par(mar = c(5, 5.2, 2, 1))
plot(
pred_longit,outcomes = 1:3,
ylab_long = c(
"Probability of reduced LVEF",
"Valve gradient (mmHg)",
expression("LVMI (" ~ g/m^2 ~ ")")
),xlab = "Time (years)",
xlim = c(0, 10),
cex_axis = 1.5,
cex_xlab = 1.8,
cex_ylab_long = 1.8,
col_points = "gray20",
col_line_long= "black",
fill_CI_long = "gray80"
)
# Predict survival probabilities
<- predict(
pred_surv
obj_valve,newdata = newdf,
process = "event",
times = seq(t0, 10, length.out = 10),
return_newdata = TRUE
)
# Combined event + longitudinal
par(mfrow = c(1, 1))
plot(
pred_longit, pred_surv,outcomes = 1:3,
fun_event = function(x) 1 - x, # survival = 1 - CIF
ylab_event = "Survival Probabilities",
ylab_long = c(
"Probability of reduced LVEF",
"LV gradient (mmHg)",
expression("LVMI (" ~ g/m^2 ~ ")")
),pos_ylab_long = c(0.5, 40, 300),
col_points = "gray20",
col_line_long = "black",
fill_CI_long = "gray80",
col_line_event= "black",
fill_CI_event = "gray80",
xlab = "Time (years)",
xlim = c(0, 10),
cex_axis= 1.2,
cex_xlab= 1,
cex_ylab_long = 1.2,
cex_ylab_event= 1
)
# Print predicted survival at 10 years
%>%
pred_surv ::filter(surv_time == 10) %>%
dplyr::select(id, surv_time, pred_CIF, low_CIF, upp_CIF) %>%
dplyrmutate(
pred = round(1 - pred_CIF, 3),
lower = round(1 - upp_CIF, 3),
upper = round(1 - low_CIF, 3)
)