-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlm.R
More file actions
50 lines (40 loc) · 1.91 KB
/
lm.R
File metadata and controls
50 lines (40 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
library(dplyr)
library(dummies)
horizons = c(120,90,42,28,14,0)
df <- read.csv("data/CNNData1992to2018idx.csv")
c2v = df %>% group_by(Candidateidentifier) %>% summarise_at(vars(Percentage_of_Vote_won_x), funs(mean(., na.rm=TRUE)))
# df$Republican = df$Republican*2-1
df$polls <- df$numberSupport/df$samplesize
colnames(df)[colnames(df) == 'Percentage_of_Vote_won_x'] = 'vote'
df$pollsteridx = as.character(df$pollsteridx)
# df <- dummy.data.frame(data=df, names='pollsteridx')
for (horizon in horizons){
model <- lm(polls ~ Candidateidentifier
+ Candidateidentifier:daysLeft - 1, data = df[df$daysLeft<=-horizon,])
result <- summary(model)
output = data.frame(matrix(ncol = 9, nrow = 0))
colnames(output) = c('cycle','state','candidate','posteriormean','posteriorstd', 'vote','pvi', 'party','experienced')
for (i in 1:nrow(c2v)) {
c = as.character(c2v[i,1]$Candidateidentifier)
# a = result$coefficients[paste('Candidateidentifier',c,':daysLeft', sep=''),1]
skip_to_next <- FALSE
tryCatch(result$coefficients[paste('Candidateidentifier',c, sep=''),1], error = function(e) { skip_to_next <<- TRUE})
if(skip_to_next) { next }
b = result$coefficients[paste('Candidateidentifier',c, sep=''),1]
std_b = result$coefficients[paste('Candidateidentifier',c, sep=''),2]
# x = df[df$Candidateidentifier==c,]$daysLeft
# y = df[df$Candidateidentifier==c,]$polls
# t = min(x):0
# z = a*t+b
# plot(t,z, type="l")
# points(x,y)
tmp = df[df$Candidateidentifier==c,c('cycle','state','vote','pvi','experienced','Republican')][1,]
tmp['posteriormean'] = b
tmp['posteriorstd'] = std_b
tmp['Republican'] = 2*tmp['Republican']-1
tmp['candidate'] = substring(c, 7)
colnames(tmp)[colnames(tmp) == 'Republican'] = 'party'
output = rbind(output, tmp)
}
write.csv(output, paste('results/lm1992-2018all', horizon, '.csv', sep=''), row.names=FALSE)
}