Say we flipped a coint 40 times and looked at the results:
n <- 40
coinflip <- function(){
results <- sample(x = c("H", "T"), size = n, replace = TRUE, prob = c(0.49, 0.51))
hno <- results[results == "H"] %>% length()
resplot <- data.frame(outcome = c("Heads", "Tails"), tally = c(hno, n-hno))
return(resplot)
}
resplot <- coinflip()
ggplot(data = resplot, aes(x = outcome, y = tally, fill = outcome)) +
geom_bar(stat = 'identity' )
now if we flipped it a few more times let’s have a look at what the results would be;
resplot <- coinflip()
p1 <- ggplot(data = resplot, aes(x = outcome, y = tally, fill = outcome)) +
geom_bar(stat = 'identity' )
resplot <- coinflip()
p2 <- ggplot(data = resplot, aes(x = outcome, y = tally, fill = outcome)) +
geom_bar(stat = 'identity' )
resplot <- coinflip()
p3 <- ggplot(data = resplot, aes(x = outcome, y = tally, fill = outcome)) +
geom_bar(stat = 'identity' )
resplot <- coinflip()
p4 <- ggplot(data = resplot, aes(x = outcome, y = tally, fill = outcome)) +
geom_bar(stat = 'identity' )
grid.arrange(p1, p2, p3, p4, nrow = 2)
These Results should hopefully evoke some curiosity, so let’s now repeat this 20 times and look at the distribution of the number of heads:
m <- 20
resvec <- vector(length = m)
for (i in 1:m) {
n <- 40
results <- sample(x = c("H", "T"), size = n, replace = TRUE, prob = c(0.49, 0.51))
results
hno <- results[results == "H"] %>% length()
hno %>% print()
resvec[i] <- hno
}
## [1] 22
## [1] 20
## [1] 20
## [1] 21
## [1] 16
## [1] 18
## [1] 16
## [1] 18
## [1] 20
## [1] 20
## [1] 23
## [1] 15
## [1] 19
## [1] 15
## [1] 24
## [1] 24
## [1] 21
## [1] 19
## [1] 13
## [1] 25
print(resvec)
## [1] 22 20 20 21 16 18 16 18 20 20 23 15 19 15 24 24 21 19 13 25
resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."
resvecDF %>% head()
## No.Of.Heads.
## 1 22
## 2 20
## 3 20
## 4 21
## 5 16
## 6 18
hist(resvec)
resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."
ggplot(resvecDF, aes(x = No.Of.Heads.)) +
geom_histogram(binwidth = 5) +
theme_classic()
With that result we can start to sustpect that the distribution of the mean values will actually be about normal, let’s look at 100 repetitions of our experiment:
m <- 100
resvec <- vector(length = m)
for (i in 1:m) {
n <- 40
results <- sample(x = c("H", "T"), size = n, replace = TRUE, prob = c(0.49, 0.51))
results
hno <- results[results == "H"] %>% length()
hno %>% print()
resvec[i] <- hno
}
## [1] 12
## [1] 22
## [1] 19
## [1] 18
## [1] 21
## [1] 20
## [1] 18
## [1] 26
## [1] 19
## [1] 21
## [1] 28
## [1] 17
## [1] 24
## [1] 22
## [1] 21
## [1] 23
## [1] 22
## [1] 22
## [1] 19
## [1] 24
## [1] 17
## [1] 19
## [1] 25
## [1] 21
## [1] 22
## [1] 22
## [1] 23
## [1] 19
## [1] 20
## [1] 13
## [1] 24
## [1] 21
## [1] 19
## [1] 22
## [1] 23
## [1] 18
## [1] 13
## [1] 17
## [1] 26
## [1] 23
## [1] 14
## [1] 22
## [1] 16
## [1] 21
## [1] 25
## [1] 20
## [1] 15
## [1] 20
## [1] 19
## [1] 23
## [1] 16
## [1] 22
## [1] 19
## [1] 22
## [1] 24
## [1] 24
## [1] 18
## [1] 21
## [1] 22
## [1] 22
## [1] 19
## [1] 25
## [1] 17
## [1] 16
## [1] 20
## [1] 17
## [1] 17
## [1] 18
## [1] 20
## [1] 13
## [1] 22
## [1] 20
## [1] 16
## [1] 23
## [1] 20
## [1] 25
## [1] 21
## [1] 20
## [1] 20
## [1] 23
## [1] 18
## [1] 18
## [1] 18
## [1] 17
## [1] 20
## [1] 20
## [1] 19
## [1] 21
## [1] 20
## [1] 22
## [1] 28
## [1] 19
## [1] 19
## [1] 24
## [1] 24
## [1] 23
## [1] 16
## [1] 18
## [1] 23
## [1] 22
#print(resvec)
resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."
resvecDF %>% head()
## No.Of.Heads.
## 1 12
## 2 22
## 3 19
## 4 18
## 5 21
## 6 20
hist(resvec)
resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."
ggplot(resvecDF, aes(x = No.Of.Heads.)) +
geom_histogram(binwidth = 5) +
theme_classic()
now let’s look at a really big number:
m <- 10000
resvec <- vector(length = m)
for (i in 1:m) {
n <- 40
results <- sample(x = c("H", "T"), size = n, replace = TRUE, prob = c(0.49, 0.51))
results
hno <- results[results == "H"] %>% length()
#hno %>% print()
resvec[i] <- hno
}
#print(resvec)
resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."
#resvecDF %>% head()
hist(resvec)
resvecDF <- as.data.frame(resvec )
colnames(resvecDF) <- "No.Of.Heads."
ggplot(resvecDF, aes(x = No.Of.Heads.)) +
geom_histogram(binwidth = 5) +
theme_classic()