Note: Students should always aim to produce
publication-worthy tables and figures. Unless otherwise stated,
tables should be rendered using stargazer::(), while
figures can be rendered using ggplot2::() or
plot(). Regardless, tables and figures should always be
presented with necessary formatting – e.g., (sub)title, axis (variable)
labels and titles, a clearly-identifiable legend and key, etc. Problem
sets must always be compiled using LaTex or
RMarkdown and include the full coding routine (with notes
explaining your implementation) used to complete each problem
(10pts).
cowplot::() or similar.
(2pts)
set.seed(4421) # Set Random Seed
a <- runif(1000, min = 0, max = 1) # Uniform Distribution
b <- rnorm(1000, mean = 5, sd = 2.5) # Standard Normal (Mean = 5, SD = 2.5)
plot_a <- ggplot(data.frame(x = a), aes(x)) +
geom_histogram(bins = 30, fill = "deepskyblue3", color = "black") +
labs(x = "\nUniform (0,1)",
y = 'Volume\n') +
geom_vline(xintercept = 0.5, linetype = 2, linewidth = 1.25) +
theme_minimal() +
theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA),
axis.text = element_text(size = 12, colour = 'black'),
axis.title = element_text(size = 14, colour = 'black'))# Plot A
plot_b <- ggplot(data.frame(x = b), aes(x)) +
geom_histogram(bins = 30, fill = "coral4", color = "black") +
labs(x = "\nNormal (μ=5, σ=2.5)",
y = 'Volume\n') +
geom_vline(xintercept = 5, linetype = 2, linewidth = 1.25) +
theme_minimal() +
theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA),
axis.text = element_text(size = 12, colour = 'black'),
axis.title = element_text(size = 14, colour = 'black')) # Plot B
cowplot::plot_grid(plot_a, plot_b) # Combine to Single Plot Grd
set.seed(4421) # Set Random Seed
probs <- c(rep(1/7, 5), 2/7) # 6 = 2*p, so 1-5 = 1/7 & 6 = 2/7
rolls <- as.data.frame(table(sample(1:6, size = 1000, replace = TRUE, prob = probs))) %>% # Simulate Rolls (& Convert to DF)
setNames(c("side", "count")) %>% # Assign Column Names
mutate(proportion = count/sum(count),
prob = probs) # Recover Proportion
rolls %>%
mutate(proportion = count/sum(count)) # Proportion & Probs
## side count proportion prob
## 1 1 143 0.143 0.1428571
## 2 2 160 0.160 0.1428571
## 3 3 142 0.142 0.1428571
## 4 4 137 0.137 0.1428571
## 5 5 141 0.141 0.1428571
## 6 6 277 0.277 0.2857143
rolls %>%
ggplot(aes(x = factor(side), y = proportion)) +
geom_bar(stat = "identity", fill = "skyblue2", colour = 'black') +
geom_point(aes(y = prob), color = "black", fill = 'white', shape = 23, size = 3) +
geom_hline(yintercept = 0) +
labs(x = "\nDie Face",
y = "Proportion of Rolls\n",
title = 'Loaded Dice Simulation') +
scale_y_continuous(limits = c(0, max(rolls$proportion) + 0.05)) +
theme_minimal() +
theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA),
axis.text = element_text(size = 12, colour = 'black'),
axis.title = element_text(size = 14, colour = 'black'))
set.seed(4421) # Set Random Seed
play_game <- function() {
player_1 <- 50 # Starting $0.50
player_2 <- 50 # Starting $0.50
while (player_1 > 0 && player_2 > 0) {
outcome <- sample(c(-1, 0, 1), size = 1, prob = c(0.45, 0.10, 0.45)) # Sim Single RPS Winner: 1 = P1, -1 = P2, 0 = Tie
if (outcome == 1) { # If Player 1 Wins
player_1 <- player_1 + 1 # Add 1 cent to P1
player_2 <- player_2 - 1
} else if (outcome == -1) { # If Player 2 Wins
player_1 <- player_1 - 1
player_2 <- player_2 + 1 # Add 1 cent to P2
} # (Move on if Tie)
}
if (player_1 == 100) return("Player 1") # If P1 gets $1 -- Return their Name
if (player_2 == 100) return("Player 2") # If P2 gets $1 -- Reutrn their Name
} # Function to Simulate Rock-Paper-Scissors Until 1 Player has $1
n_sim <- 1000 # Number of Simulations to Run (1k)
results <- c() # Empty List to Store Output (Could Also Use replicate(1000, play_game()))
for (i in 1:n_sim){
temp_winner <- play_game()
results <- c(results, temp_winner)
}
data.frame(table(results)) %>%
setNames(c('player', 'frequency')) %>%
ggplot(aes(x = player, y = frequency)) +
geom_col(aes(fill = player), colour = 'black') +
scale_fill_manual(values = c('skyblue4', 'coral4')) +
geom_hline(yintercept = 0) +
geom_label(aes(label = frequency), vjust = -0.5, size = 4) +
scale_y_continuous(limits = c(0, max(data.frame(table(results))$Freq) + 25)) +
labs(x = '\n ',
y = 'Number of Wins\n') +
theme_minimal() +
theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA),
axis.text = element_text(size = 12, colour = 'black'),
axis.title = element_text(size = 14, colour = 'black'),
legend.position = 'none')
for loop. Once
completed, plot the distribution of sample means from both groups.
(2pts)
set.seed(4421) # Set Random Seed
mu <- 50 # Mean
sigma <- 5 # SD
n_sim <- 1000 # Simulations Count
means_combined <- data.frame() # Empty DF to Store Output
for (i in seq(10, 40, 10)){
temp_mean <- replicate(n_sim, mean(rnorm(i, mean = mu, sd = sigma))) # Temp Run (i Samples)
temp_mean <- data.frame(mean = temp_mean,
samples = i) # Combine to Temp DF
means_combined <- bind_rows(means_combined, temp_mean) # Export to DF
} # Run Individual Sample Sizes -- Export to means_combined
means_combined %>%
mutate(samples = as.factor(samples)) %>%
ggplot(aes(x = mean, group = samples)) +
geom_density(aes(fill = samples), alpha = 1/3) +
geom_hline(yintercept = 0) +
geom_vline(xintercept = 50, linetype = 2) +
labs(x = '\nSample Mean',
y = 'Density\n',
fill = 'Sample Size') +
theme_minimal() +
theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA),
axis.text = element_text(size = 12, colour = 'black'),
axis.title = element_text(size = 14, colour = 'black'),
legend.position = 'right',
legend.title = element_text(size = 10, colour = 'black', vjust = 0.5),
legend.background = element_rect(linewidth = 1, colour = 'black', fill = NA))
set.seed(4421) # Random Seed Generation
mu <- 5 # 5 Min Mean
calls <- data.frame(rexp(1000, rate = 1/mu)) %>%
setNames('arrival_time') # 1000 Sims (Rate = 1/Avg. Time)
mean(calls <= 3) # Prob <=3 Min
## [1] 0.437
calls %>%
ggplot(aes(x = arrival_time)) +
geom_histogram(aes(y = after_stat(density)), bins = 30,
fill = 'skyblue4', colour = 'black') +
stat_function(fun = dexp, args = list(rate = 1/mu), colour = "red", linewidth = 1.2) + # Plot Theoretical Distribution Too
geom_hline(yintercept = 0) +
labs(x = "\nArrival Time (Minutes)",
y = "Density\n") +
theme_minimal() +
scale_x_continuous(breaks = seq(5, 30, 5)) +
theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA),
axis.text = element_text(size = 12, colour = 'black'),
axis.title = element_text(size = 14, colour = 'black'),
legend.position = 'right',
legend.title = element_text(size = 10, colour = 'black', vjust = 0.5),
legend.background = element_rect(linewidth = 1, colour = 'black', fill = NA))