R code Chapter 12

Mae Jemstone character from Discovering Statistics using R and RStudio

This document contains abridged sections from Discovering Statistics Using R and RStudio by Andy Field so there are some copyright considerations. You can use this material for teaching and non-profit activities but please do not meddle with it or claim it as your own work. See the full license terms at the bottom of the page.

Load packages

Remember to load the tidyverse:

library(tidyverse)

Load the data

Load the data from the discover package:

pupluv_tib <- discovr::puppy_love

If you want to read the file from the CSV and you have set up your project folder as I suggest in Chapter 1, then the code you would use is:

pupluv_tib <- here::here("data/puppy_love.csv") %>%
  readr::read_csv() %>%
  dplyr::mutate(
    dose = forcats::as_factor(dose)
  )

A good idea to check that the levels of dose are in the order Control, 15 minutes, 30 minutes by executing:

levels(pupluv_tib$dose)
## [1] "No puppies" "15 mins"    "30 mins"

If they’re not in the correct order then:

pupluv_tib <- pupluv_tib %>%
  dplyr::mutate(
    dose = forcats::fct_relevel(dose, "No puppies", "15 mins", "30 mins")
  )

Self-test

pupluv_tib %>% 
  dplyr::group_by(dose) %>% 
  dplyr::summarize(
    dplyr::across(c(happiness, puppy_love), list(mean = mean, sd = sd), .names = "{col}_{fn}")
    ) %>% 
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 2))
  )
dosehappiness_meanhappiness_sdpuppy_love_meanpuppy_love_sd
No puppies3.221.793.442.07
15 mins4.881.463.121.73
30 mins4.852.122.001.63
pupluv_tib %>% 
  dplyr::summarize(
    dplyr::across(c(happiness, puppy_love), list(mean = mean, sd = sd), .names = "{col}_{fn}")
    ) %>% 
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 2))
  )
happiness_meanhappiness_sdpuppy_love_meanpuppy_love_sd
4.371.962.731.86

Self-test

pupluv_tib <- pupluv_tib %>%
  dplyr::mutate(
    none_vs_30 = ifelse(dose == "30 mins", 1, 0),
    none_vs_15 = ifelse(dose == "15 mins", 1, 0)
  )

pupluv_lm <- lm(happiness ~ none_vs_15 + none_vs_30 + puppy_love, data = pupluv_tib)
broom::glance(pupluv_lm)
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
0.28764990.20545571.7436383.4996360.02954493-57.10086124.2017131.207779.047122630
broom::tidy(pupluv_lm, conf.int = TRUE) %>% 
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 3))
  )
termestimatestd.errorstatisticp.valueconf.lowconf.high
(Intercept)1.7890.8672.0630.0490.0073.572
none_vs_151.7860.8492.1020.0450.0403.532
none_vs_302.2250.8032.7710.0100.5753.875
puppy_love0.4160.1872.2270.0350.0320.800

Self-test

luvdose_lm <- lm(puppy_love ~ dose, data = pupluv_tib)

anova(luvdose_lm) %>% 
  parameters::model_parameters()
ParameterSum_SquaresdfMean_SquareFp
dose12.7694426.3847221.9792540.1577178
Residuals87.09722273.225823NANA

Self-test

cov_first_lm <- lm(happiness ~ puppy_love + dose, data = pupluv_tib)

anova(cov_first_lm) %>% 
  parameters::model_parameters()
ParameterSum_SquaresdfMean_SquareFp
puppy_love6.73435716.7343572.2150500.1487004
dose25.185194212.5925974.1419290.0274465
Residuals79.047116263.040274NANA
broom::tidy(cov_first_lm, conf.int = TRUE) %>% 
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 3))
  )
termestimatestd.errorstatisticp.valueconf.lowconf.high
(Intercept)1.7890.8672.0630.0490.0073.572
puppy_love0.4160.1872.2270.0350.0320.800
dose15 mins1.7860.8492.1020.0450.0403.532
dose30 mins2.2250.8032.7710.0100.5753.875

Self-test

dose_first_lm <- lm(happiness ~ dose + puppy_love, data = pupluv_tib)

anova(dose_first_lm) %>% 
  parameters::model_parameters()
ParameterSum_SquaresdfMean_SquareFp
dose16.8438028.4219022.7701130.0811729
puppy_love15.07575115.0757484.9586810.0348334
Residuals79.04712263.040274NANA
broom::tidy(dose_first_lm, conf.int = TRUE) %>% 
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 3))
  )
termestimatestd.errorstatisticp.valueconf.lowconf.high
(Intercept)1.7890.8672.0630.0490.0073.572
dose15 mins1.7860.8492.1020.0450.0403.532
dose30 mins2.2250.8032.7710.0100.5753.875
puppy_love0.4160.1872.2270.0350.0320.800

Set contrasts

puppy_vs_none <- c(-2/3, 1/3, 1/3)
short_vs_long <- c(0, -1/2, 1/2)

contrasts(pupluv_tib$dose) <- cbind(puppy_vs_none, short_vs_long)

No covariate

pup_lm <- lm(happiness ~ dose, data = pupluv_tib) 
anova(pup_lm)
## Analysis of Variance Table
## 
## Response: happiness
##           Df Sum Sq Mean Sq F value Pr(>F)
## dose       2 16.844  8.4219  2.4159 0.1083
## Residuals 27 94.123  3.4860

Type III sums of squares

pupluv_lm <- lm(happiness ~ puppy_love + dose, data = pupluv_tib)
car::Anova(pupluv_lm, type = 3)
## Anova Table (Type III tests)
## 
## Response: happiness
##             Sum Sq Df F value    Pr(>F)    
## (Intercept) 76.069  1 25.0205 3.342e-05 ***
## puppy_love  15.076  1  4.9587   0.03483 *  
## dose        25.185  2  4.1419   0.02745 *  
## Residuals   79.047 26                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Self test

dose_first_lm <- lm(happiness ~ dose + puppy_love, data = pupluv_tib)
car::Anova(cov_first_lm, type = 3)
## Anova Table (Type III tests)
## 
## Response: happiness
##             Sum Sq Df F value  Pr(>F)  
## (Intercept) 12.943  1  4.2572 0.04920 *
## puppy_love  15.076  1  4.9587 0.03483 *
## dose        25.185  2  4.1419 0.02745 *
## Residuals   79.047 26                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Adjusted means

modelbased::estimate_means(pupluv_lm, fixed = "puppy_love")
dosepuppy_loveMeanSECI_lowCI_high
No puppies2.7333332.9263700.59620451.7008544.151886
15 mins2.7333334.7120500.62079713.4359835.988117
30 mins2.7333335.1512510.50263234.1180766.184427

Parameter estimates

broom::tidy(pupluv_lm, conf.int = TRUE) %>% 
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 3))
  )
termestimatestd.errorstatisticp.valueconf.lowconf.high
(Intercept)3.1260.6255.0020.0001.8414.411
puppy_love0.4160.1872.2270.0350.0320.800
dosepuppy_vs_none2.0050.7202.7850.0100.5253.485
doseshort_vs_long0.4390.8110.5410.593-1.2282.107

Post hoc tests

modelbased::estimate_contrasts(pupluv_lm, fixed = "puppy_love")
Level1Level2puppy_loveDifferenceSECI_lowCI_hightdfpStd_Difference
15 mins30 mins2.733333-0.43920120.8112214-2.5150701.6366676-0.5414073260.5928363-0.2245258
No puppies15 mins2.733333-1.78568010.8493553-3.9591310.3877712-2.1023947260.0907071-0.9128647
No puppies30 mins2.733333-2.22488130.8028109-4.279228-0.1705345-2.7713641260.0305250-1.1373905

Homogeneity of regression slopes

hors_lm <- lm(happiness ~ puppy_love*dose, data = pupluv_tib)
car::Anova(hors_lm, type = 3)
## Anova Table (Type III tests)
## 
## Response: happiness
##                 Sum Sq Df F value    Pr(>F)    
## (Intercept)     53.542  1 21.9207 9.323e-05 ***
## puppy_love      17.182  1  7.0346   0.01395 *  
## dose            36.558  2  7.4836   0.00298 ** 
## puppy_love:dose 20.427  2  4.1815   0.02767 *  
## Residuals       58.621 24                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Alternatively, update the previous model:

hors_lm <- update(pupluv_lm, .~. + dose:puppy_love)
car::Anova(hors_lm, type = 3)
## Anova Table (Type III tests)
## 
## Response: happiness
##                 Sum Sq Df F value    Pr(>F)    
## (Intercept)     53.542  1 21.9207 9.323e-05 ***
## puppy_love      17.182  1  7.0346   0.01395 *  
## dose            36.558  2  7.4836   0.00298 ** 
## puppy_love:dose 20.427  2  4.1815   0.02767 *  
## Residuals       58.621 24                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual plots:

ggplot2::autoplot(pupluv_lm,
                  which = c(1, 3, 2, 4),
                  colour = "#5c97bf",
                  smooth.colour = "#ef4836",
                  alpha = 0.5,
                  size = 1) + 
  theme_minimal()

robust model

Robust parameter estimates:

pupluv_rob <- robust::lmRob(happiness ~ puppy_love + dose, data = pupluv_tib)
summary(pupluv_rob)
## 
## Call:
## robust::lmRob(formula = happiness ~ puppy_love + dose, data = pupluv_tib)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.34090 -0.34090  0.02081  0.89243  5.92569 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)         2.1260     2.2616   0.940    0.356
## puppy_love          0.6333     0.5824   1.087    0.287
## dosepuppy_vs_none   1.6276     1.8641   0.873    0.391
## doseshort_vs_long  -0.4549     2.8052  -0.162    0.872
## 
## Residual standard error: 1.174 on 26 degrees of freedom
## Multiple R-Squared: 0.3901 
## 
## Test for Bias:
##             statistic p-value
## M-estimate     -5.868  1.0000
## LS-estimate     5.659  0.2261

Heteroscedasticity consistent standard errors:

parameters::model_parameters(pupluv_lm, robust = TRUE, vcov.type = "HC4", digits = 3)
ParameterCoefficientSECI_lowCI_hightdf_errorp
(Intercept)3.12604210.59203141.90910424.34298005.2801968260.0000161
puppy_love0.41604210.19857440.00786670.82421752.0951451260.0460462
dosepuppy_vs_none2.00528070.51572370.94519543.06536603.8882848260.0006252
doseshort_vs_long0.43920120.7399362-1.08175941.96016190.5935663260.5579313

Bootstrap parameters

parameters::bootstrap_parameters(pupluv_lm)
ParameterCoefficientCI_lowCI_highp
(Intercept)3.18933532.11689954.51433210.0000000
puppy_love0.3954619-0.06799830.70633470.0739261
dosepuppy_vs_none1.99378210.88656443.01624010.0039960
doseshort_vs_long0.3637506-0.82144661.83753690.5954046

Bayes factors

pupcov_bf <-  BayesFactor::lmBF(formula = happiness ~ puppy_love, data = pupluv_tib, rscaleFixed = "medium", rscaleCont = "medium")
pupcov_bf
## Bayes factor analysis
## --------------
## [1] puppy_love : 0.6778239 ±0%
## 
## Against denominator:
##   Intercept only 
## ---
## Bayes factor type: BFlinearModel, JZS
pup_bf <-  BayesFactor::lmBF(formula = happiness ~ puppy_love + dose, data = pupluv_tib, rscaleCont = "medium", rscaleFixed = "medium")
pup_bf
## Bayes factor analysis
## --------------
## [1] puppy_love + dose : 1.431258 ±0.7%
## 
## Against denominator:
##   Intercept only 
## ---
## Bayes factor type: BFlinearModel, JZS
pup_bf/pupcov_bf
## Bayes factor analysis
## --------------
## [1] puppy_love + dose : 2.111548 ±0.7%
## 
## Against denominator:
##   happiness ~ puppy_love 
## ---
## Bayes factor type: BFlinearModel, JZS

Effect sizes

car::Anova(pupluv_lm, type = 3) %>% 
  effectsize::eta_squared(., ci = 0.95)
ParameterEta_Sq_partialCICI_lowCI_high
2puppy_love0.16017090.9500.4108051
3dose0.24162560.9500.4733256

Without a pipe:

pupluv_aov <- car::Anova(pupluv_lm, type = 3)
effectsize::omega_squared(pupluv_aov, ci = 0.95)
ParameterOmega_Sq_partialCICI_lowCI_high
2puppy_love0.11657350.95-0.03703700.3795428
3dose0.17318600.95-0.07407410.4242189
Previous