library(tidyverse)
## Make some data.........................................
n <- 200
x <- seq(from = -9, to = 9, length.out = n)
y_t <- exp(x) / (1 + exp(x))
y <- y_t + rnorm(n, mean = 0, sd = 0.3)
i <- sample(c(TRUE, FALSE), size = n, replace = TRUE)
data <- tibble(x, y, Set = c("train", "test")[i %% 2 + 1])
## Construct some polynomial models.......................
preds <- list()
for (i in 1:6) {
## [glue]
mod <- lm(y ~ poly(x, i), filter(data, Set == "train"))
data <- data %>%
mutate("Deg: {i}" := predict(mod, newdata = data["x"]))
}
## Plot the data..........................................
data_tidy <- data %>%
select(!c(y, Set)) %>%
pivot_longer(cols = !x, names_to = "Degree")
data_tidy
ggplot(data_tidy) +
geom_point(data = data, aes(x = x, y = y, shape = Set), size = 2) +
geom_line(aes(x = x, y = value, color = Degree), lwd = 1) +
geom_line(data = tibble(x, y = y_t), aes(x = x, y = y), lwd = 3) +
theme_bw() +
labs(x = "Input", y = "Output",
title = "Visualisation of Model Performance")
ggsave("many_model_plot.png")
## [glue] This is glue like syntax available in dplyr>=1.
get_conf_interval <- function(samp, alpha) {
lower = (1-alpha)/2
upper = 1-lower
z <- make_sampling_dist(samp)
## z is not normally distributed, we could improve accuracy with
## Fisher's z-transformation
z <- 0.5 * log((1 + z) / (1 - z))
quantile(z, c(lower, upper))
}