CNNForecasting/Harris.R at master · yahoochen97/CNNForecasting · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
library(ggridges)
library(ggplot2)
library(grid)
library(dplyr)

setwd('/Users/yahoo/Documents/WashU/CSE515T/Code/Gaussian Process')

horizons = c('0',
             '7',
             '14',
             '21',
             '28',
             '42',
             '56')

TYPE='GP'

best_cv_idx = read.csv(paste("results/", TYPE, "_opthyp.csv", sep=''));
best_cv_idx = best_cv_idx$opt_idx

test_year = 2016
STATE = "Arizona"
CANDIDATE = "McCain"


COLNAMES = c('horizon','Posterior_Vote')
HARRIS = data.frame(matrix(ncol = length(COLNAMES), nrow = 0))
colnames(HARRIS) = COLNAMES

for (a in length(horizons):1) {
    fit = readRDS(file = paste("models/",TYPE, "_", test_year,"day_", horizons[a] , "_fit.rds",sep=''))
    fit_params <- as.data.frame(fit)

    # load the prior files
    input_file = paste('results/LOO', TYPE, '_' , test_year, 'day', horizons[a], '_', best_cv_idx[a] ,'.csv',sep='')
    output_file = paste('results/stan_LOO', TYPE, '_' , test_year, 'day', horizons[a], '_', best_cv_idx[a] ,'.csv',sep='')
    data <- read.csv(input_file)
    print(input_file)

    # remove unlike candidates of races with >4 candidates
    data <- data[data$cycle!=2016 | data$state!='Louisiana' | data$candidate!='Flemsing',]
    data <- data[data$cycle!=2020 | data$state!='Georgia' | data$candidate!='Loeffler',]
    data <- data[data$cycle!=2020 | data$state!='Georgia' | data$candidate!='Tarver',]

    # split training and testing data
    data_test <- data[(data$cycle==test_year),]
    data <- data[(data$cycle!=test_year & data$cycle!=2018 & data$cycle!=2020),]

    states <- union(unique(data$state), unique(data_test$state))

    # test data
    test_metadata <- list()
    test_y <- list()
    test_counter <- 0

    # iterate over races
    for (cycle in unique(data_test$cycle)){
      for (state in states) {
        vote = data_test[data_test$state==state & data_test$cycle==cycle,c("vote")]
        if(length(vote)){
          test_counter = test_counter + 1
          test_metadata[[test_counter]] = c(cycle,state)
          test_y[[test_counter]] = vote / sum(vote)
        }
      }
    }

    test_idx2 <- c()
    test_idx3 <- c()
    test_idx4 <- c()

    for (i in 1:test_counter) {
      tmp = length(test_y[[i]])
      if(tmp==2) test_idx2 = c(test_idx2, i)
      if(tmp==3) test_idx3 = c(test_idx3, i)
      if(tmp==4) test_idx4 = c(test_idx4, i)
    }

    i = 1
    for(tmp in 1:length(test_idx2)){
      if (test_metadata[[test_idx2[tmp]]][2]==STATE){
         i = tmp
      }
    }
    cycle = test_metadata[[test_idx2[i]]][1]
    vote = test_y[[test_idx2[i]]]
    candidates = data_test[data_test$state==STATE & data_test$cycle==test_year,c("candidate")]
    j = 1
    for(tmp in 1:length(candidates)){
      if (candidates[tmp]==CANDIDATE){
        j = tmp
      }
    }

    tmp = paste('test_y2[',i,',',j,']',sep='')
    pred = fit_params[[tmp]]

    harris = data.frame(matrix(ncol = length(COLNAMES), nrow = length(pred)))
    colnames(harris) = COLNAMES

    harris$Posterior_Vote = pred*100
    harris$horizon = as.numeric(horizons[a])
    HARRIS = rbind(HARRIS, harris)
}

v = 100*vote[j]
v = round(v, 2)
LABELS = rev(c("Election Day", "One week left", "Two weeks left",
           "Three weeks left", "For weeks left", "Six weeks left", "Eight weeks left"))

ggplot(HARRIS, aes(x = Posterior_Vote, y = reorder(horizon, desc(horizon)))) +
  geom_density_ridges(alpha=0.6) +
  scale_y_discrete(expand = c(0, 0), name = "Horizon",labels=LABELS) +
  scale_x_continuous(expand = c(0, 0), breaks = c(40,50,60,v, 70,80),
                     name = "Posterior Vote (%)") +
  geom_vline(xintercept=v, colour="blue") +
  theme(panel.grid.major.x = element_line(color = "gray"),
        panel.grid.major.y = element_line(size=.2, color="grey" )) +
  coord_cartesian(xlim = c(40, 80), clip='on') +
  theme(plot.title = element_text(hjust=0.5),
        panel.background = element_rect(fill = 'white', colour = 'white'))