---
title: "R code in Chapter 2"
author: "Andy P. Field"
date: 'Last updated `r format(Sys.Date(), "%d %B %Y")`'
output: html_document
---
```{r, include = FALSE}
knitr::opts_chunk$set(
warning = FALSE,
message = FALSE
)
library(magrittr)
library(dplyr)
library(ggplot2)
```
```{r setup, results="hide", eval = F}
# Make sure to load this package
library(tidyverse)
```
***
## Usage
This file contains code relevant to chapter 1 of [Discovering Statistics Using R and RStudio](https://www.discovr.rocks) by [Andy Field](https://www.discoveringstatistics.com/about). These files contain abridged sections from the book so there are some copyright considerations but I offer them under a [Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License](http://creativecommons.org/licenses/by-nc-nd/4.0/).^[Basically you can use this material for teaching and non-profit activities but do not meddle with it or claim it as your own work.]
***
## The median
```{r median}
fb_tib <- tibble::tibble(
friends = c(57, 40, 103, 234, 93, 53, 116, 98, 108, 121, 22)
)
# Base R:
median(fb_tib$friends)
# One pipe:
fb_tib %>%
dplyr::summarize(
median = median(friends)
)
```
## The mean
```{r mean}
# Base R:
mean(fb_tib$friends)
mean(fb_tib$friends, trim = 0.1)
# A pipe to get the mean, trimmed mean and median:
fb_tib %>%
dplyr::summarize(
median = median(friends),
mean = mean(friends),
`trimmed mean 10%` = mean(friends, trim = 0.1)
)
```
### The dispersion in a distribution
```{r dispersion}
# The range
max(fb_tib$friends) - min(fb_tib$friends)
# Quartiles
quantile(fb_tib$friends, probs = c(0.25, 0.5, 0.75))
# Lower quartile
quantile(fb_tib$friends, probs = 0.25)
# Upper quartile
quantile(fb_tib$friends, probs = 0.75)
# Inter-quartile range
IQR(fb_tib$friends)
# Variance
var(fb_tib$friends)
# standard deviation
sd(fb_tib$friends)
# Tidyverse sumptuousness:
fb_tib %>%
dplyr::summarize(
median = median(friends),
mean = mean(friends),
`trimmed mean 10%` = mean(friends, trim = 0.1),
range = max(friends) - min(friends),
`lower quartile` = quantile(friends, probs = 0.25),
`upper quartile` = quantile(friends, probs = 0.75),
IQR = IQR(friends),
var = var(friends),
sd = sd(friends)
) %>%
round(., 2)
```
***
## Pieces of great
### Pieces of great 2.1
```{r}
round(3.211420)
round(3.211420, 2)
round(3.211420, 4)
round(mean(fb_tib$friends, trim = 0.1), 2)
mean(fb_tib$friends, trim = 0.1) %>% round(., 2)
fb_tib %>%
dplyr::summarize(
median = median(friends),
mean = mean(friends),
`trimmed mean 10%` = mean(friends, trim = 0.1)
) %>%
round(., 2)
```
### Pieces of great 2.2
```{r}
get_summary <- function(tibble, variable){
variable <- enquo(variable)
summary <- tibble %>%
dplyr::summarise(
median = median(!!variable),
mean = mean(!!variable),
`trimmed mean 10%` = mean(!!variable, trim = 0.1)
) %>%
round(., 2)
return(summary)
}
fb_tib %>%
get_summary(., friends)
```
Or, annoy people by using random names for the inputs of the function:
```{r}
get_summary <- function(johnson_pitchfork, harry_the_hungy_hippo){
harry_the_hungy_hippo <- enquo(harry_the_hungy_hippo)
summary <- johnson_pitchfork %>%
dplyr::summarise(
median = median(!!harry_the_hungy_hippo),
mean = mean(!!harry_the_hungy_hippo),
`trimmed mean 10%` = mean(!!harry_the_hungy_hippo, trim = 0.1)
) %>%
round(., 2)
return(summary)
}
fb_tib %>%
get_summary(., friends)
```
Include options relating to `mean()` and `median()`
```{r}
get_summary <- function(tibble, variable, na_remove = FALSE, trim_val = 0){
variable <- enquo(variable)
summary <- tibble %>%
dplyr::summarise(
median = median(!!variable, na.rm = na_remove),
mean = mean(!!variable, na.rm = na_remove, trim = trim_val)
) %>%
round(., 2)
return(summary)
}
fb_tib %>%
get_summary(., friends, na_remove = TRUE, trim_val = 0.1)
```