R code Chapter 14

Mae Jemstone character from Discovering Statistics using R and RStudio
This document contains abridged sections from Discovering Statistics Using R and RStudio by Andy Field so there are some copyright considerations. You can use this material for teaching and non-profit activities but please do not meddle with it or claim it as your own work. See the full license terms at the bottom of the page.

Load packages

Remember to load the tidyverse:

library(tidyverse)

and the other packages we’ll use

library(broom)
library(broom.mixed)
library(datawizard)
library(lme4)
library(lmerTest)

Load the data

Load the data from the discover package:

cosmetic_tib <- discovr::cosmetic

If you want to read the file from the CSV and you have set up your project folder as I suggest in Chapter 1, then the code you would use is:

cosmetic_tib <- here::here("data/cosmetic.csv") |>
  readr::read_csv() |>
  dplyr::mutate(
    clinic = forcats::as_factor(clinic),
    reason = forcats::as_factor(reason)
    )

This code reads the file in and converts the variables clinic and reason to factors. It’s a good idea to check that the levels of clinic are sequential from clinic 1 to clinic 21, and that reason has levels ordered change appearance and physical reason. Check the factor levels by executing:

levels(cosmetic_tib$clinic)
##  [1] "Clinic 1"  "Clinic 2"  "Clinic 3"  "Clinic 4"  "Clinic 5"  "Clinic 6" 
##  [7] "Clinic 7"  "Clinic 8"  "Clinic 9"  "Clinic 10" "Clinic 11" "Clinic 12"
## [13] "Clinic 13" "Clinic 14" "Clinic 15" "Clinic 16" "Clinic 17" "Clinic 18"
## [19] "Clinic 19" "Clinic 20"
levels(cosmetic_tib$reason)
## [1] "Change appearance" "Physical reason"

If they’re not in the correct order then:

cosmetic_tib <- cosmetic_tib |>
  dplyr::mutate(
    clinic = forcats::fct_relevel(clinic, paste("Clinic", seq(1, 20, 1))),
    reason = forcats::fct_relevel(reason, "Change appearance")
    )

Plot the data

Create the first plot in the chapter

ggplot2::ggplot(cosmetic_tib, aes(days, post_qol, colour = clinic)) +
  geom_point(alpha = 0.5, size = 1) +
  geom_smooth(method = "lm", se = FALSE, size = 0.5) +
  coord_cartesian(xlim = c(0, 400), ylim = c(0, 100)) +
  scale_y_continuous(breaks = seq(0, 100, 10)) +
  labs(x = "Days post surgery", y = "Quality of life after surgery (%)", colour = "Clinic") +
  guides(colour=guide_legend(ncol=2)) +
  theme_minimal()

Now the second plot

ggplot2::ggplot(cosmetic_tib, aes(days, post_qol)) +
  geom_point(position = position_jitter(width = 0.1, height = NULL), alpha = 0.5, size = 1) +
  geom_smooth(method = "lm", se = FALSE, size = 0.5) +
  coord_cartesian(xlim = c(0, 400), ylim = c(0, 100)) +
  scale_y_continuous(breaks = seq(0, 100, 10)) +
  labs(x = "Days post surgery", y = "Quality of life after surgery (%)", colour = "Clinic") +
  facet_wrap(~ clinic, ncol = 4) +
  theme_minimal()

Summary statistics

qol_sum <- cosmetic_tib |>
  dplyr::group_by(clinic) |>
  datawizard::describe_distribution(select = c("base_qol", "post_qol"), ci = 0.95)

qol_sum |>
  dplyr::mutate(
    Clinic = stringr::str_remove(.group, "clinic=")
    ) |>
  dplyr::select(Clinic, Variable:n) |>
  knitr::kable(caption = "Summary statistics for the surgery data",
               digits = 2)
ClinicVariableMeanSDIQRCI_lowCI_highMinMaxSkewnessKurtosisn
Clinic 1base_qol45.668.1513.5043.5848.9933600.26-0.9532
Clinic 1post_qol25.6611.0415.7522.5929.808520.41-0.3532
Clinic 2base_qol43.788.8412.0041.6546.0225690.220.1067
Clinic 2post_qol54.8513.2720.0051.4258.262781-0.05-0.8867
Clinic 3base_qol44.649.5310.5042.6947.0722660.220.0058
Clinic 3post_qol66.4711.0014.5063.9769.473089-0.411.1458
Clinic 4base_qol47.0710.5013.5044.7749.012173-0.280.2270
Clinic 4post_qol54.7010.6312.2552.0457.3532790.370.0270
Clinic 5base_qol44.398.8814.7542.8545.812562-0.09-0.87124
Clinic 5post_qol47.0512.2616.0044.8449.2716760.110.12124
Clinic 6base_qol44.879.7412.0042.9846.9223690.180.2195
Clinic 6post_qol64.2614.1421.0061.2867.2432950.07-0.4395
Clinic 7base_qol46.8010.2216.2543.2849.852871-0.05-0.6444
Clinic 7post_qol59.689.4511.5057.2662.2042830.940.5844
Clinic 8base_qol45.4410.4514.0043.6946.9621780.470.34129
Clinic 8post_qol35.6111.9116.0033.8737.697650.05-0.27129
Clinic 9base_qol45.549.5511.0043.9246.8118680.190.02117
Clinic 9post_qol39.3212.6716.5036.6741.240720.130.10117
Clinic 10base_qol42.3510.8014.0040.4644.561070-0.230.34110
Clinic 10post_qol36.9412.6015.5034.4238.75670-0.20-0.05110
Clinic 11base_qol45.189.9614.5043.1346.911773-0.150.3189
Clinic 11post_qol50.0116.6326.0046.5453.4916860.21-0.5889
Clinic 12base_qol44.5210.1715.0042.1146.662067-0.10-0.3765
Clinic 12post_qol60.0012.4019.0057.0362.0435900.05-0.5765
Clinic 13base_qol44.1910.5515.2541.2947.742164-0.11-0.4236
Clinic 13post_qol45.3113.0519.2541.5648.811674-0.08-0.2436
Clinic 14base_qol44.549.9614.0042.2446.191865-0.05-0.21118
Clinic 14post_qol45.9210.0315.2544.1547.921965-0.16-0.49118
Clinic 15base_qol45.4110.2513.0043.7047.3623720.22-0.13109
Clinic 15post_qol31.1711.1815.0029.3632.735590.03-0.38109
Clinic 16base_qol45.2910.3917.0042.2648.2227650.12-0.7631
Clinic 16post_qol69.9013.9819.0065.7374.684396-0.08-0.7131
Clinic 17base_qol44.327.859.0042.3446.6129711.031.8359
Clinic 17post_qol44.128.5410.0042.1146.4424660.080.5259
Clinic 18base_qol44.379.2412.0042.7046.561466-0.180.8271
Clinic 18post_qol63.4613.5920.0060.5366.87351000.550.2671
Clinic 19base_qol43.9010.3215.0041.9846.321568-0.220.0072
Clinic 19post_qol44.589.9113.0042.4046.7223670.20-0.5372
Clinic 20base_qol45.0810.0914.5043.0847.3421670.04-0.4680
Clinic 20post_qol51.1510.6916.0048.6753.2031740.33-0.7780

Table 1: Summary statistics for the surgery data

Fixed effect fits

Fit the model to the pooled data

pooled_lm <- lm(post_qol ~ days*reason + base_qol, data = cosmetic_tib)

broom::tidy(pooled_lm, conf.int = TRUE) |>
  knitr::kable(caption = "Parameter estimates for the pooled data model",
               digits = 3)
termestimatestd.errorstatisticp.valueconf.lowconf.high
(Intercept)24.7552.05812.0300.00020.71928.791
days0.0090.0042.0680.0390.0000.017
reasonPhysical reason-2.4791.616-1.5340.125-5.6490.691
base_qol0.4590.04011.5190.0000.3800.537
days:reasonPhysical reason0.0230.0073.2050.0010.0090.037

Table 2: Parameter estimates for the pooled data model

Fit the model across contexts

clinic_lms <- cosmetic_tib |>
  dplyr::arrange(clinic) |>
  dplyr::group_by(clinic) |>
  tidyr::nest() |>
  dplyr::mutate(
    model = purrr::map(.x = data,
                       .f = \(clinic_tib) lm(post_qol ~ days*reason + base_qol, data = clinic_tib)),
    coefs = purrr::map(model, tidy, conf.int = TRUE)
    )

models <- clinic_lms |>
  dplyr::select(-c(data, model)) |>
  tidyr::unnest(coefs)
models |>
  ggplot(aes(estimate)) +
  geom_density() +
  facet_wrap(~term , scales = "free") +
  theme_minimal()

Random effect models

Try to fit the model

cosmetic_mod <- lmerTest::lmer(post_qol ~ days*reason + base_qol + (days|clinic), data = cosmetic_tib)

allFit(cosmetic_mod)
## bobyqa : [OK]
## Nelder_Mead : [OK]
## nlminbwrap : [OK]
## nmkbw : [OK]
## optimx.L-BFGS-B : [OK]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA : [OK]

## original model:
## post_qol ~ days * reason + base_qol + (days | clinic) 
## data:  cosmetic_tib 
## optimizers (7): bobyqa, Nelder_Mead, nlminbwrap, nmkbw, optimx.L-BFGS-B,nloptwrap.NLOPT_LN_N...
## differences in negative log-likelihoods:
## max= 0.304 ; std dev= 0.12

Rescale days

cosmetic_tib <- cosmetic_tib |>
  dplyr::mutate(
    months = days*12/365
    )

Standardizing variables

We don’t use this model, but if you wanted to fit the model to standardized variables then use this code.

cosmetic_tib <- cosmetic_tib |>
  dplyr::mutate(across(.cols = where(is.double),
                       .fns = \(column) (column-mean(column, na.rm = T))/sd(column, na.rm = T),
                       .names = "z_{.col}")
                )

cosmetic_modz <- lmerTest::lmer(z_post_qol ~ z_days*reason + z_base_qol + (z_days|clinic), data = cosmetic_tib)

broom.mixed::tidy(cosmetic_modz)

Fit the model using months

cosmetic_mod <- lmerTest::lmer(post_qol ~ months*reason + base_qol + (months|clinic), data = cosmetic_tib, REML = T)

allFit(cosmetic_mod)
## bobyqa : [OK]
## Nelder_Mead : [OK]
## nlminbwrap : [OK]
## nmkbw : [OK]
## optimx.L-BFGS-B : [OK]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA : [OK]

## original model:
## post_qol ~ months * reason + base_qol + (months | clinic) 
## data:  cosmetic_tib 
## optimizers (7): bobyqa, Nelder_Mead, nlminbwrap, nmkbw, optimx.L-BFGS-B,nloptwrap.NLOPT_LN_N...
## differences in negative log-likelihoods:
## max= 5.3e-07 ; std dev= 1.98e-07

Change the optimizer

cosmetic_bob <- lmerTest::lmer(
  post_qol ~ months*reason + base_qol + (months|clinic),
  data = cosmetic_tib,
  control = lmerControl(optimizer="bobyqa")
  )

Use maximum likelihood estimation

cosmetic_ml <- lmerTest::lmer(
  post_qol ~ months*reason + base_qol + (months|clinic),
  data = cosmetic_tib,
  REML = F
  )

Interpreting the model

anova(cosmetic_bob) |> 
  knitr::kable(caption = "Fixed effects for the final model",
               digits = c(rep(2, 5), 3))
Sum SqMean SqNumDFDenDFF valuePr(>F)
months275.41275.41119.033.210.089
reason288.06288.0611535.473.360.067
base_qol32785.5332785.5311534.92382.660.000
months:reason1014.541014.5411535.1211.840.001

Table 3: Fixed effects for the final model

options(knitr.kable.NA = '')
broom.mixed::tidy(cosmetic_bob, conf.int = T) |> 
  knitr::kable(caption = "Parameter estimates for the final model",
               digits = c(rep(2, 7), 3, 2, 2))
effectgrouptermestimatestd.errorstatisticdfp.valueconf.lowconf.high
fixed(Intercept)25.074.026.2422.480.00016.7533.39
fixedmonths0.480.401.2219.620.239-0.351.31
fixedreasonPhysical reason-1.790.98-1.831535.470.067-3.710.12
fixedbase_qol0.470.0219.561534.920.0000.420.52
fixedmonths:reasonPhysical reason0.450.133.441535.120.0010.190.70
ran_parsclinicsd__(Intercept)17.05
ran_parscliniccor__(Intercept).months-0.70
ran_parsclinicsd__months1.73
ran_parsResidualsd__Observation9.26

Table 4: Parameter estimates for the final model

cosmetic_slopes <- modelbased::estimate_slopes(cosmetic_bob,
                                               trend = "months",
                                               at = "reason",
                                               ci = 0.95) 

cosmetic_slopes |> 
  knitr::kable(caption = "Change over time for different reasons for surgery",
        digits = 3)
reasonCoefficientSECICI_lowCI_hightdf_errorp
Change appearance0.4820.3970.95-0.3461.3111.21519.6230.239
Physical reason0.9290.4010.950.0941.7652.31620.5580.031

Table 5: Change over time for different reasons for surgery

Some alternative ways to get the same simple slopes

cosmetic_slopes <- interactions::sim_slopes(
  cosmetic_bob,
  pred = months,
  modx = reason,
  confint = TRUE
  ) 

cosmetic_slopes <-  emmeans::emtrends(cosmetic_bob, var = "months", spec = "reason")

Plot the simple slopes

interactions::interact_plot(
  model = cosmetic_bob,
  pred = months,
  modx = reason,
  interval = TRUE,
  x.label = "Months since surgery",
  y.label = "Quality of life post-surgery (0-100)",
  legend.main = "Reason for surgery"
  ) 

Previous
Next