Preamble

Load Packages

load.pac <- function() {
  
  if(require("pacman")){
    library(pacman)
  }else{
    install.packages("pacman")
    library(pacman)
  }
  
  pacman::p_load(xts, sp, gstat, ggplot2, rmarkdown, reshape2, ggmap, wesanderson,
                 parallel, dplyr, plotly, tidyverse, reticulate, UsingR, Rmpfr, latex2exp,
                 mise, GGally, usmap, gridExtra)
  
#  devtools::install_github("tidyverse/tidyverse")
}

load.pac()
## Loading required package: pacman
mise()

## Load Data I couldn’t get read_xls to work so I just used Libre Office to export them.

(orders  <- read.csv(file = "./11_Data_Orders.csv"))  %>% head()
(returns <- read.csv(file = "./11_Data_Returns.csv")) %>% head()
(peopl   <- read.csv(file = "./11_Data_People.csv"))  %>% head()

Question 1

Visualise the Data

orders$Ship.Date <- as.Date(orders$Ship.Date, format = "%d/%m/%Y")
orders$Order.Date <- as.Date(orders$Order.Date, format = "%d/%m/%Y")
## ggplot(orders, aes(x = Order.Date, y = Sales, col = Sub.Category)) +
##   geom_col()


ppt <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.3) +
  theme_bw() +
  geom_smooth() +
  guides(col = guide_legend("")) +
  labs(title = "Profit given Sales amount accross categories")

ppl <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.3) +
  theme_bw() +
  guides(col = FALSE) +
  labs(title = "Profit given Sales amount accross categories")
  

grid.arrange(grobs = list(ppt, ppl), layout_matrix = matrix(2:1, nrow = 1))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

(a) Change the Mark Size

The size of the points can be increased by specifying the size parameter in the geom_point layer:

ppt <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.3, size = 6) +
  theme_bw() +
  geom_smooth() +
  guides(col = guide_legend("")) +
  labs(title = "Profit given Sales amount accross categories")

ppl <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.3, size = 6) +
  theme_bw() +
  guides(col = FALSE) +
  labs(title = "Profit given Sales amount accross categories")
  

grid.arrange(grobs = list(ppt, ppl), layout_matrix = matrix(2:1, nrow = 1))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

The size could even be mapped to another variable, say for example the Discount

ppt <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.3, aes(size = Discount)) +
  theme_bw() +
  geom_smooth() +
  guides(col = guide_legend(""), size = FALSE) +
  labs(title = "Profit given Sales amount accross categories")

ppl <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.3, aes(size = Discount)) +
  theme_bw() +
  guides(col = FALSE) +
  labs(title = "Profit given Sales amount accross categories")
  

grid.arrange(grobs = list(ppt, ppl), layout_matrix = matrix(2:1, nrow = 1))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

This indicates that discounts hurt profits.

(b) Customise the Shapes

The shapes could be mapped to anything, even to say unicode:

ppt <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.6, aes(size = Discount), shape = "𝚺") +
  theme_bw() +
  geom_smooth() +
  guides(col = guide_legend(""), size = FALSE) +
  labs(title = "Profit given Sales amount accross categories")

ppl <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.6, aes(size = Discount), shape = "𝚺") +
  theme_bw() +
  guides(col = FALSE) +
  labs(title = "Profit given Sales amount accross categories")
  

grid.arrange(grobs = list(ppt, ppl), layout_matrix = matrix(2:1, nrow = 1))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

It would be preferable however to map shape to some variable, say for example above/below average quantity:

orders$Quantity_cat <- factor(ifelse(orders$Quantity > mean(orders$Quantity), "High", "Low"), levels = c("High", "Low"), ordered = FALSE)

ppt <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.6, aes(size = Discount, shape = Quantity_cat)) +
  theme_bw() +
  geom_smooth() +
  guides(col = guide_legend(""), size = FALSE) +
  labs(title = "Profit given Sales amount accross categories")

ppl <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.6, aes(size = Discount, shape = Quantity_cat)) +
  theme_bw() +
  guides(col = FALSE) +
  labs(title = "Profit given Sales amount accross categories")
  

grid.arrange(grobs = list(ppt, ppl), layout_matrix = matrix(2:1, nrow = 1))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

(c) Customise the Mark Labels

The axis and title labels can be adjusted arbitrarily, the latex2exp function will even allow \(\LaTeX\) syntax to be used:

library(ggrepel)
orders$Quantity_cat <- factor(ifelse(orders$Quantity > mean(orders$Quantity), "High", "Low"), levels = c("High", "Low"), ordered = FALSE)

ppt <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.6, aes(size = Discount, shape = Quantity_cat)) +
  theme_bw() +
  geom_smooth() +
  guides(col = guide_legend(""), size = FALSE) +
  labs(title = "Profit given Sales amount accross categories") 

ppl <- ggplot(orders, aes(x = Sales, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.6, aes(size = Discount, shape = Quantity_cat)) +
  theme_bw() +
  guides(col = FALSE) +
  labs(title = TeX("f( a ) = \\frac{1}{2 π i}  ∲_Ύ \\frac{f( z ) }{z - a} dz"),
       x = "𝑺ales")
  

grid.arrange(grobs = list(ppt, ppl), layout_matrix = matrix(2:1, nrow = 1))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Question 2 > Include Multiple measures into the visualisation

Using Multiple Rows / Columns

This can be implemented readily by using geom_facet, it will however, require coercing the data into tidy format:

orders_tidy <- pivot_longer(orders, cols = c(Quantity, Discount, Sales))


ggplot(orders_tidy, aes(x = value, y = Profit, col = Sub.Category)) +
  geom_point(alpha = 0.3) +
  facet_grid(. ~ name, scales = "free_x") +
  theme_bw() +
  geom_smooth() +
  guides(col = guide_legend("")) +
  labs(title = "Profit given Sales amount accross categories", x = "")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.

## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.

Sharing the y-axis

Sharing y-axis is a little bit more work, first look at the the ratio between the data ranges, playing around with this will reveal that 20, 000 is a good ratio.

range(orders$Sales)
## [1]     0.444 22638.480
range(orders$Profit)
## [1] -6599.978  8399.976

Now the data can be modified, rescaled and a custom y-axis implemented:

orders_tidy <- pivot_longer(orders %>% mutate(Discount = Discount*20000), cols = c(Discount, Profit))
ggplot(orders_tidy, aes(x = Sales, y = value, col = name)) +
  geom_line() +
  scale_y_continuous(
    
    # Features of the first axis
    name = "Profit",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis( trans=~.*(1/20000), name="Discount")
  ) +
  scale_x_continuous(limits = c(0, 12000)) +
  theme_bw()
## Warning: Removed 6 row(s) containing missing values (geom_path).

Question 3

Implement a Dual-Axis Chart

A dual axis chart can be implemented by specifying a second axis as before:

r <- 500

orders_tidy <- pivot_longer(orders %>% mutate(Quantity = Quantity*r), cols = c(Quantity, Profit))

ggplot(orders_tidy, aes(x = Sales, y = value, col = name)) +
  geom_line() +
  scale_y_continuous(
    
    # Features of the first axis
    name = "Profit",
    
    # Add a second axis and specify its features
    sec.axis = sec_axis( trans=~.*(1/r), name="Quantity")
  ) +
  scale_x_continuous(limits = c(0, 12000)) +
  theme_bw()
## Warning: Removed 6 row(s) containing missing values (geom_path).

Question 4

Implement a visualisation on the map

To generate a choropleth map, match names to map names:

## state_names <- map("state", plot = FALSE)$name
## only_names  <- sapply(strsplit(state_names, ":"), "[", 1)
## index    <- match(only_names, tolower(state.name))
## index[8] <- 47

Then assign some colours:

col_vec <- function(x, lowcol = "white", highcol = "red") {
  rgb(colorRamp(c(lowcol, highcol))((x-min(x))/(max(x)-min(x))),
      maxColorValue = 255)[index]
}

Then draw the map

## profits_vec <- orders$Profit
## names(profits_vec) <- orders$State
## 
## maps::map('state', fill = TRUE, col = col_vec(profits_vec, "white", "green"))

(There is a bug in knitr that prevents this from compiling) so Alternatively the usmap allows us to use the usmap library through the much more robust ggplot2:

library(usmap)
names(orders)[names(orders)=="State"] <- "state"
prof_df <- aggregate(Profit ~ state, orders, mean)


plot_usmap(regions = "state", data = prof_df, values = "Profit", col = "white") + 
  labs(title = "Profits",
       subtitle = "Aggregated Mean Value by State")  +
  theme(legend.position = "right") +
  guides(fill = guide_legend("Profits"))
## Warning: Duplicated aesthetics after name standardisation: colour
## Warning: Use of `map_df$x` is discouraged. Use `x` instead.
## Warning: Use of `map_df$y` is discouraged. Use `y` instead.
## Warning: Use of `map_df$group` is discouraged. Use `group` instead.

#

Question 5

Qxplore Tablaue

This doesn’t translate to R and GGPlot2 so I’ve left it.