Note: Students should always aim to produce publication-worthy tables and figures. Unless otherwise stated, tables should be rendered using stargazer::(), while figures can be rendered using ggplot2::() or plot(). Regardless, tables and figures should always be presented with necessary formatting – e.g., (sub)title, axis (variable) labels and titles, a clearly-identifiable legend and key, etc. Problem sets must always be compiled using LaTex or RMarkdown and include the full coding routine (with notes explaining your implementation) used to complete each problem (10pts).


  1. Generate 1000 random numbers from a uniform distribution between zero and one. Do the same from a standard normal distribution where \(\mu\) = 5 and \(\sigma = 2.5\). Plot the results separately but in the same plot window using cowplot::() or similar. (2pts)

  2. set.seed(4421) # Set Random Seed
    
    a <- runif(1000, min = 0, max = 1)  # Uniform Distribution
    b <- rnorm(1000, mean = 5, sd = 2.5) # Standard Normal (Mean = 5, SD = 2.5)
    
    plot_a <- ggplot(data.frame(x = a), aes(x)) +
      geom_histogram(bins = 30, fill = "deepskyblue3", color = "black") +
      labs(x = "\nUniform (0,1)",
           y = 'Volume\n') +
      geom_vline(xintercept = 0.5, linetype = 2, linewidth = 1.25) + 
      theme_minimal() + 
      theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA), 
            axis.text = element_text(size = 12, colour = 'black'), 
            axis.title = element_text(size = 14, colour = 'black'))# Plot A
    
    plot_b <- ggplot(data.frame(x = b), aes(x)) +
      geom_histogram(bins = 30, fill = "coral4", color = "black") +
      labs(x = "\nNormal (μ=5, σ=2.5)",  
           y = 'Volume\n') +
      geom_vline(xintercept = 5, linetype = 2, linewidth = 1.25) + 
      theme_minimal() + 
      theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA), 
            axis.text = element_text(size = 12, colour = 'black'), 
            axis.title = element_text(size = 14, colour = 'black')) # Plot B
    
    
    cowplot::plot_grid(plot_a, plot_b) # Combine to Single Plot Grd


  3. The operator of a table game is using a biased, six-sided die. As a result, the probability of rolling a six is twice as likely as rolling any other number. Setting the random seed generator using , simulate rolling this ``loaded’’ die 1,000 times. Plot the simulated distribution and the theoretical probabilities – What proportion of the rolls are sixes? (2pts)

  4. set.seed(4421) # Set Random Seed
    
    probs <- c(rep(1/7, 5), 2/7) # 6 = 2*p, so 1-5 = 1/7 & 6 = 2/7
    rolls <- as.data.frame(table(sample(1:6, size = 1000, replace = TRUE, prob = probs))) %>% # Simulate Rolls (& Convert to DF)
       setNames(c("side", "count")) %>% # Assign Column Names
      mutate(proportion = count/sum(count), 
             prob = probs) # Recover Proportion
    
    rolls %>%
      mutate(proportion = count/sum(count)) # Proportion & Probs
    ##   side count proportion      prob
    ## 1    1   143      0.143 0.1428571
    ## 2    2   160      0.160 0.1428571
    ## 3    3   142      0.142 0.1428571
    ## 4    4   137      0.137 0.1428571
    ## 5    5   141      0.141 0.1428571
    ## 6    6   277      0.277 0.2857143
    rolls %>%
      ggplot(aes(x = factor(side), y = proportion)) + 
      geom_bar(stat = "identity", fill = "skyblue2", colour = 'black') +
      geom_point(aes(y = prob), color = "black", fill = 'white', shape = 23, size = 3) +
      geom_hline(yintercept = 0) + 
      labs(x = "\nDie Face", 
           y = "Proportion of Rolls\n", 
           title = 'Loaded Dice Simulation') +
      scale_y_continuous(limits = c(0, max(rolls$proportion) + 0.05)) + 
      theme_minimal() + 
      theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA), 
            axis.text = element_text(size = 12, colour = 'black'), 
            axis.title = element_text(size = 14, colour = 'black')) 


  5. Imagine two children are playing a rock-paper-scissors game for pennies. At the start of the game, both Player 1 and Player 2 have 50¢. Each time they play, the winner receives 1 cent from the loser – they will continue playing until one of them has $1. Simulate the progression of this game 1,000 times, such that either Player 1 or Player 2 will conclude a game with the dollar. Plot the distribution of times Player 1 wins versus Player 2. (2pts)

  6. set.seed(4421) # Set Random Seed
    
    play_game <- function() {
      player_1 <- 50 # Starting $0.50
      player_2 <- 50 # Starting $0.50
      while (player_1 > 0 && player_2 > 0) {
        outcome <- sample(c(-1, 0, 1), size = 1, prob = c(0.45, 0.10, 0.45)) # Sim Single RPS Winner: 1 = P1, -1 = P2, 0 = Tie
        if (outcome == 1) { # If Player 1 Wins
          player_1 <- player_1 + 1 # Add 1 cent to P1
          player_2 <- player_2 - 1 
        } else if (outcome == -1) { # If Player 2 Wins
          player_1 <- player_1 - 1 
          player_2 <- player_2 + 1 # Add 1 cent to P2
        } # (Move on if Tie)
      }
      if (player_1 == 100) return("Player 1") # If P1 gets $1 -- Return their Name
      if (player_2 == 100) return("Player 2") # If P2 gets $1 -- Reutrn their Name
    
      
      } # Function to Simulate Rock-Paper-Scissors Until 1 Player has $1
    
    n_sim <- 1000 # Number of Simulations to Run (1k)
    results <- c() # Empty List to Store Output (Could Also Use replicate(1000, play_game()))
    
    for (i in 1:n_sim){
      temp_winner <- play_game()
      results <- c(results, temp_winner)
    }
    
    data.frame(table(results)) %>%
      setNames(c('player', 'frequency')) %>%
      ggplot(aes(x = player, y = frequency)) + 
      geom_col(aes(fill = player), colour = 'black') +
      scale_fill_manual(values = c('skyblue4', 'coral4')) + 
      geom_hline(yintercept = 0) + 
      geom_label(aes(label = frequency), vjust = -0.5, size = 4) + 
      scale_y_continuous(limits = c(0, max(data.frame(table(results))$Freq) + 25)) +
      labs(x = '\n ', 
          y = 'Number of Wins\n') + 
      theme_minimal() + 
      theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA), 
            axis.text = element_text(size = 12, colour = 'black'), 
            axis.title = element_text(size = 14, colour = 'black'), 
            legend.position = 'none') 


  7. You are studying the average height of plants in a greenhouse. Heights are normally distributed with \(\mu\) = 50 and \(\sigma\) = 5. You want to see how sample means vary for different sample sizes. Simulate 1,000 sample means, each computed from a random sample of 10 plants drawn from the population. Store those means and redo the simulation using a random samples of 20, 30, 40, and 50 plants – Use a for loop. Once completed, plot the distribution of sample means from both groups. (2pts)

  8. set.seed(4421) # Set Random Seed
    
    mu <- 50 # Mean
    sigma <- 5 # SD
    n_sim <- 1000 # Simulations Count
    means_combined <- data.frame() # Empty DF to Store Output
    
    for (i in seq(10, 40, 10)){
      temp_mean <- replicate(n_sim, mean(rnorm(i, mean = mu, sd = sigma))) # Temp Run (i Samples)
      temp_mean <- data.frame(mean = temp_mean, 
                              samples = i) # Combine to Temp DF
      means_combined <- bind_rows(means_combined, temp_mean) # Export to DF
    } # Run Individual Sample Sizes -- Export to means_combined
    
    means_combined %>%
      mutate(samples = as.factor(samples)) %>%
      ggplot(aes(x = mean, group = samples)) + 
      geom_density(aes(fill = samples), alpha = 1/3) +
      geom_hline(yintercept = 0) + 
      geom_vline(xintercept = 50, linetype = 2) + 
      labs(x = '\nSample Mean',
           y = 'Density\n', 
           fill = 'Sample Size') + 
      theme_minimal() + 
      theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA), 
            axis.text = element_text(size = 12, colour = 'black'), 
            axis.title = element_text(size = 14, colour = 'black'), 
            legend.position = 'right', 
            legend.title = element_text(size = 10, colour = 'black', vjust = 0.5), 
            legend.background = element_rect(linewidth = 1, colour = 'black', fill = NA)) 


  9. A call center receives customer calls at a rate of one every 5 minutes, on average. The time between calls can be modeled using an exponential distribution with a mean of 5 minutes. Simulate 1,000 inter-arrival times (in minutes) for incoming calls. Plot the distributions of simulated times and overlay the theoretical exponential density curve. Estimate the probability that the next call arrives within 3 minutes. (2pts)

  10. set.seed(4421) # Random Seed Generation
    mu <- 5 # 5 Min Mean
    calls <- data.frame(rexp(1000, rate = 1/mu)) %>%
      setNames('arrival_time') # 1000 Sims (Rate = 1/Avg. Time)
    
    mean(calls <= 3) # Prob <=3 Min
    ## [1] 0.437
    calls %>%
      ggplot(aes(x = arrival_time)) + 
      geom_histogram(aes(y = after_stat(density)), bins = 30, 
                   fill = 'skyblue4', colour = 'black') + 
      stat_function(fun = dexp, args = list(rate = 1/mu), colour = "red", linewidth = 1.2) + # Plot Theoretical Distribution Too
      geom_hline(yintercept = 0) + 
      labs(x = "\nArrival Time (Minutes)", 
           y = "Density\n") +
        theme_minimal() + 
      scale_x_continuous(breaks = seq(5, 30, 5)) + 
      theme(panel.border = element_rect(linewidth = 1, colour = 'black', fill = NA), 
            axis.text = element_text(size = 12, colour = 'black'), 
            axis.title = element_text(size = 14, colour = 'black'), 
            legend.position = 'right', 
            legend.title = element_text(size = 10, colour = 'black', vjust = 0.5), 
            legend.background = element_rect(linewidth = 1, colour = 'black', fill = NA))