Appendix

Appendix

Multiple overlayed models

library(tidyverse)

## Make some data.........................................
n <- 200
x <- seq(from = -9, to = 9, length.out = n)
y_t <- exp(x) / (1 + exp(x))
y <- y_t + rnorm(n, mean = 0, sd = 0.3)
i <- sample(c(TRUE, FALSE), size = n, replace = TRUE)
data <- tibble(x, y, Set = c("train", "test")[i %% 2 + 1])

## Construct some polynomial models.......................
preds <- list()
for (i in 1:6) {
    ## [glue]
    mod <- lm(y ~ poly(x, i), filter(data, Set == "train"))
    data <- data %>%
        mutate("Deg: {i}" := predict(mod, newdata = data["x"]))
}

## Plot the data..........................................
data_tidy <- data %>%
    select(!c(y, Set)) %>%
    pivot_longer(cols = !x, names_to = "Degree")
data_tidy

ggplot(data_tidy) +
    geom_point(data = data, aes(x = x, y = y, shape = Set), size = 2) +
    geom_line(aes(x = x, y = value, color = Degree), lwd = 1) +
    geom_line(data = tibble(x, y = y_t), aes(x = x, y = y), lwd = 3) +
    theme_bw() +
    labs(x = "Input", y = "Output",
         title = "Visualisation of Model Performance")

ggsave("many_model_plot.png")


## [glue] This is glue like syntax available in dplyr>=1.

Fisher's Z Transform

get_conf_interval <- function(samp, alpha) {
    lower = (1-alpha)/2
    upper = 1-lower

    z <- make_sampling_dist(samp)

    ## z is not normally distributed, we could improve accuracy with
    ## Fisher's z-transformation
    z <- 0.5 * log((1 + z) / (1 - z))

    quantile(z, c(lower, upper))
}