Consider a Problem

Say we flipped a coint 40 times and looked at the results:

n <- 40

coinflip <- function(){
results <- sample(x = c("H", "T"), size = n, replace = TRUE, prob = c(0.49, 0.51))

hno <- results[results == "H"] %>% length()
resplot <- data.frame(outcome = c("Heads", "Tails"), tally = c(hno, n-hno))
return(resplot)
}
resplot <- coinflip()

ggplot(data = resplot, aes(x = outcome, y = tally, fill = outcome)) +
  geom_bar(stat = 'identity' )

now if we flipped it a few more times let’s have a look at what the results would be;

resplot <- coinflip()
p1 <-  ggplot(data = resplot, aes(x = outcome, y = tally, fill = outcome)) +
  geom_bar(stat = 'identity' )


resplot <- coinflip()
p2 <-  ggplot(data = resplot, aes(x = outcome, y = tally, fill = outcome)) +
  geom_bar(stat = 'identity' )


resplot <- coinflip()
p3 <-  ggplot(data = resplot, aes(x = outcome, y = tally, fill = outcome)) +
  geom_bar(stat = 'identity' )


resplot <- coinflip()
p4 <-  ggplot(data = resplot, aes(x = outcome, y = tally, fill = outcome)) +
  geom_bar(stat = 'identity' )

grid.arrange(p1, p2, p3, p4, nrow = 2)

These Results should hopefully evoke some curiosity, so let’s now repeat this 20 times and look at the distribution of the number of heads:

m <- 20
resvec <- vector(length = m)
for (i in 1:m) {

n <- 40
results <- sample(x = c("H", "T"), size = n, replace = TRUE, prob = c(0.49, 0.51))
results
hno <- results[results == "H"] %>% length()
hno %>% print()

resvec[i] <-  hno
  
    
}
## [1] 22
## [1] 20
## [1] 20
## [1] 21
## [1] 16
## [1] 18
## [1] 16
## [1] 18
## [1] 20
## [1] 20
## [1] 23
## [1] 15
## [1] 19
## [1] 15
## [1] 24
## [1] 24
## [1] 21
## [1] 19
## [1] 13
## [1] 25
print(resvec)
##  [1] 22 20 20 21 16 18 16 18 20 20 23 15 19 15 24 24 21 19 13 25
resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."
resvecDF %>% head()
##   No.Of.Heads.
## 1           22
## 2           20
## 3           20
## 4           21
## 5           16
## 6           18
hist(resvec)

resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."

ggplot(resvecDF, aes(x = No.Of.Heads.)) + 
  geom_histogram(binwidth = 5) +
  theme_classic()

With that result we can start to sustpect that the distribution of the mean values will actually be about normal, let’s look at 100 repetitions of our experiment:

m <- 100
resvec <- vector(length = m)
for (i in 1:m) {

n <- 40
results <- sample(x = c("H", "T"), size = n, replace = TRUE, prob = c(0.49, 0.51))
results
hno <- results[results == "H"] %>% length()
hno %>% print()

resvec[i] <-  hno
  
    
}
## [1] 12
## [1] 22
## [1] 19
## [1] 18
## [1] 21
## [1] 20
## [1] 18
## [1] 26
## [1] 19
## [1] 21
## [1] 28
## [1] 17
## [1] 24
## [1] 22
## [1] 21
## [1] 23
## [1] 22
## [1] 22
## [1] 19
## [1] 24
## [1] 17
## [1] 19
## [1] 25
## [1] 21
## [1] 22
## [1] 22
## [1] 23
## [1] 19
## [1] 20
## [1] 13
## [1] 24
## [1] 21
## [1] 19
## [1] 22
## [1] 23
## [1] 18
## [1] 13
## [1] 17
## [1] 26
## [1] 23
## [1] 14
## [1] 22
## [1] 16
## [1] 21
## [1] 25
## [1] 20
## [1] 15
## [1] 20
## [1] 19
## [1] 23
## [1] 16
## [1] 22
## [1] 19
## [1] 22
## [1] 24
## [1] 24
## [1] 18
## [1] 21
## [1] 22
## [1] 22
## [1] 19
## [1] 25
## [1] 17
## [1] 16
## [1] 20
## [1] 17
## [1] 17
## [1] 18
## [1] 20
## [1] 13
## [1] 22
## [1] 20
## [1] 16
## [1] 23
## [1] 20
## [1] 25
## [1] 21
## [1] 20
## [1] 20
## [1] 23
## [1] 18
## [1] 18
## [1] 18
## [1] 17
## [1] 20
## [1] 20
## [1] 19
## [1] 21
## [1] 20
## [1] 22
## [1] 28
## [1] 19
## [1] 19
## [1] 24
## [1] 24
## [1] 23
## [1] 16
## [1] 18
## [1] 23
## [1] 22
#print(resvec)
resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."
resvecDF %>% head()
##   No.Of.Heads.
## 1           12
## 2           22
## 3           19
## 4           18
## 5           21
## 6           20
hist(resvec)

resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."

ggplot(resvecDF, aes(x = No.Of.Heads.)) + 
  geom_histogram(binwidth = 5) +
  theme_classic()

now let’s look at a really big number:

m <- 10000
resvec <- vector(length = m)
for (i in 1:m) {

n <- 40
results <- sample(x = c("H", "T"), size = n, replace = TRUE, prob = c(0.49, 0.51))
results
hno <- results[results == "H"] %>% length()
#hno %>% print()

resvec[i] <-  hno
  
    
}
#print(resvec)
resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."
#resvecDF %>% head()
hist(resvec)

resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."

ggplot(resvecDF, aes(x = No.Of.Heads.)) + 
  geom_histogram(binwidth = 5) +
  theme_classic()