Skip to content

Commit 212599e

Browse files
author
Berj Chilingirian
committed
Add quick mode; refactor
1 parent 74d758e commit 212599e

File tree

14 files changed

+519
-319
lines changed

14 files changed

+519
-319
lines changed

aus_senate_audit/audit_info.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# -*- coding: utf-8 -*-
2+
3+
""" Encapsulates Utilities for Recording Intermediate Audit Information. """
4+
5+
from json import load
6+
from json import dumps
7+
from os import makedirs
8+
from os.path import exists
9+
10+
from constants import AGGREGATE_BALLOTS_FILE_NAME
11+
from constants import AUDIT_DIR_NAME
12+
from constants import AUDIT_INFO_FILE_NAME
13+
from constants import COLUMN_HEADERS
14+
from constants import MATCH_HEADERS
15+
from constants import ROUND_DIR_NAME
16+
from constants import SELECTED_BALLOTS_FILE_NAME
17+
from constants import AUDIT_STAGE_KEY
18+
from constants import SAMPLE_SIZE_KEY
19+
20+
21+
class AuditInfo(object):
22+
""" Encapsulates utilities for recording intermediate audit information.
23+
24+
:ivar str state: The abbreviated name of the state to run the audit for.
25+
"""
26+
def __init__(self, state):
27+
""" Initializes an :class:`AuditResults` object.
28+
29+
:param str state: The abbreviated name of the state to run the audit for.
30+
"""
31+
self._state = state
32+
if not exists(self.get_audit_dir_name()):
33+
makedirs('{}/{}'.format(self.get_audit_dir_name(), ROUND_DIR_NAME))
34+
self.set_audit_info(0, 0)
35+
self.initialize_aggregate_ballots_csv()
36+
37+
def get_audit_dir_name(self):
38+
""" Returns the audit directory name for the given state.
39+
40+
:returns: The audit directory name for the given state.
41+
:rtype: str
42+
"""
43+
return AUDIT_DIR_NAME.format(self._state)
44+
45+
def get_audit_results_file_path(self, file_name):
46+
""" Returns the file path for the given audit results file name.
47+
48+
:param str file_name: The name of the file to return the full file path for.
49+
50+
:returns: The file path for the given audit results file name.
51+
:rtype: str
52+
"""
53+
return '{}/{}'.format(
54+
self.get_audit_dir_name(),
55+
file_name,
56+
)
57+
58+
def initialize_aggregate_ballots_csv(self):
59+
""" Initializes the CSV file holding the aggregate ballots for the current sample. """
60+
with open(self.get_audit_results_file_path(AGGREGATE_BALLOTS_FILE_NAME), 'w') as f:
61+
f.write('{}\n'.format(','.join(COLUMN_HEADERS)))
62+
f.write('{}\n'.format(
63+
','.join(['------------', '---------------------', '---------------------', '-------', '-------', '-----------'])
64+
))
65+
66+
def add_new_ballots_to_aggregate(self):
67+
""" """
68+
with open(self.get_audit_results_file_path(AGGREGATE_BALLOTS_FILE_NAME), 'a') as f:
69+
new_ballots = [line for line in open(SELECTED_BALLOTS_FILE_NAME, 'r')][1:]
70+
for new_ballot in new_ballots:
71+
f.write(new_ballot)
72+
73+
def set_audit_info(self, audit_stage, sample_size):
74+
""" Sets informatinon about the audit recored thus far.
75+
76+
:param int audit_stage: The new stage of the audit.
77+
:param int sample_size: The sample size of the audit.
78+
"""
79+
open(self.get_audit_results_file_path(AUDIT_INFO_FILE_NAME), 'w').write(
80+
dumps({AUDIT_STAGE_KEY: audit_stage, SAMPLE_SIZE_KEY: sample_size})
81+
)
82+
83+
def get_audit_info(self):
84+
""" Returns information about the audit recorded thus far.
85+
86+
Example of the audit information.
87+
88+
.. code-block:: python
89+
90+
{
91+
'audit_stage': 1,
92+
'sample_size': 1200,
93+
94+
}
95+
96+
:returns: A dictionary containing information about the audit recorded
97+
thus far.
98+
:rtype: dict
99+
"""
100+
return load(open(self.get_audit_results_file_path(AUDIT_INFO_FILE_NAME), 'r'))
101+
102+
def write_audit_round_file(self, audit_stage, sample):
103+
""" """
104+
with open(self.get_audit_results_file_path(
105+
'{}/round_{}.csv'.format(ROUND_DIR_NAME, audit_stage)),
106+
'w',
107+
) as f:
108+
f.write('{}\n'.format(','.join(COLUMN_HEADERS + MATCH_HEADERS)))
109+
for ballot in sample:
110+
f.write('{}\n'.format(ballot))
111+
112+
def write_selected_ballots_file(self, sample, quick):
113+
""" """
114+
with open(SELECTED_BALLOTS_FILE_NAME, 'w') as f:
115+
f.write('{}\n'.format(','.join(COLUMN_HEADERS)))
116+
for ballot in sample:
117+
if quick:
118+
f.write('{}\n'.format(ballot))
119+
else:
120+
f.write('{}\n'.format(ballot.split('"')[0]))
121+
122+
def set_current_audit_round_file(self, ballots):
123+
""" """
124+
self.write_audit_round_file(self.get_audit_info()[AUDIT_STAGE_KEY], ballots)
125+
126+
def get_current_audit_round_file_name(self):
127+
""" """
128+
return self.get_audit_results_file_path('{}/round_{}.csv'.format(
129+
ROUND_DIR_NAME,
130+
self.get_audit_info()[AUDIT_STAGE_KEY]),
131+
)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# -*- coding: utf-8 -*-
2+
3+
""" Validates Paper Preferences Against Electronic Preferences. """
4+
5+
from constants import AUDIT_STAGE_KEY
6+
from constants import SELECTED_BALLOTS_FILE_NAME
7+
8+
9+
class AuditValidator(object):
10+
""" Validates paper preferences against electronic preferences.
11+
12+
:ivar :class:`AuditInfo` audit_info: An object for interfacing with
13+
audit information recorded thus far.
14+
"""
15+
16+
def __init__(self, audit_info):
17+
""" Initializes a :class:`AuditValidator` object.
18+
19+
:param :class:`AuditInfo` audit_info: An object for interfacing
20+
with audit information recorded thus far.
21+
"""
22+
self._audit_info = audit_info
23+
24+
@staticmethod
25+
def get_paper_preference_readings():
26+
""" """
27+
with open(SELECTED_BALLOTS_FILE_NAME, 'r') as f:
28+
f.readline() # Skip the header.
29+
paper_preferences = []
30+
for line in f:
31+
paper_preferences.append(line.rstrip().split('"')[1])
32+
return paper_preferences
33+
34+
def get_electronic_ballots(self):
35+
""" """
36+
audit_round_file_name = self._audit_info.get_current_audit_round_file_name()
37+
with open(audit_round_file_name, 'r') as f:
38+
f.readline() # Skip the header.
39+
ballots = [line.rstrip() for line in f]
40+
return ballots
41+
42+
def compare(self):
43+
""" """
44+
paper_preferences = self.get_paper_preference_readings()
45+
electronic_ballots = self.get_electronic_ballots()
46+
match_records = []
47+
for i in range(len(electronic_ballots)):
48+
electronic_preferences = electronic_ballots[i].split('"')[1]
49+
match = int(electronic_preferences == paper_preferences[i])
50+
match_records.append(electronic_ballots[i] + ',{},"{}"\n'.format(
51+
match,
52+
paper_preferences[i],
53+
))
54+
self._audit_info.set_current_audit_round_file(match_records)
55+
self._audit_info.add_new_ballots_to_aggregate()

aus_senate_audit/audits/bayesian_audit.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,15 @@ def get_new_ballot_weights(election, r):
2727
new_ballot_weights = {}
2828
total = 0
2929
for ballot in election.get_ballots():
30-
weight = election.get_weight_of_ballot(ballot)
30+
weight = election.get_ballot_weight(ballot)
3131
new_ballot_weights[ballot] = gammavariate(weight, 1) if weight else 0
3232
total += new_ballot_weights[ballot]
3333
for ballot in election.get_ballots():
3434
new_ballot_weights[ballot] = int(r * new_ballot_weights[ballot] / total)
3535
return new_ballot_weights
3636

3737

38-
def audit(election, seed, unpopular_freq_threshold, alpha=0.05, trials=100):
38+
def audit(election, seed, unpopular_freq_threshold, stage_counter=0, alpha=0.05, trials=100, quick=False):
3939
""" Runs a Bayesian audit on the given senate election.
4040
4141
:param :class:`SenateElection` election: The senate election to audit.
@@ -70,9 +70,6 @@ def audit(election, seed, unpopular_freq_threshold, alpha=0.05, trials=100):
7070
for cid in election.get_candidate_ids():
7171
election.add_ballot((cid,), 1)
7272

73-
# Current stage of the audit.
74-
stage_counter = 0
75-
7673
# Mapping from candidates to the set of ballots that elected them.
7774
candidate_to_ballots_map = {}
7875
candidate_outcomes = None
@@ -128,7 +125,7 @@ def audit(election, seed, unpopular_freq_threshold, alpha=0.05, trials=100):
128125
]),
129126
),
130127
)
131-
128+
done = False
132129
if freq >= trials * (1 - alpha):
133130
print(
134131
'Stopping because audit confirmed outcome:\n',
@@ -137,24 +134,31 @@ def audit(election, seed, unpopular_freq_threshold, alpha=0.05, trials=100):
137134
election.get_num_ballots_drawn(),
138135
),
139136
)
137+
done = True
140138
break
141139

142140
if election.get_num_ballots_drawn() >= election.get_num_cast_ballots():
143141
print('Audit has looked at all ballots. Done.')
142+
done = True
144143
break
145144

146-
for cid, cid_freq in sorted(
147-
candidate_outcomes.items(),
148-
key=lambda x: (x[1], x[0]),
149-
):
150-
if cid_freq / trials < unpopular_freq_threshold:
151-
print(
152-
' One set of ballots that elected low frequency '
153-
'candidate {} which occurred in {}% of outcomes\n'.format(
154-
str(cid),
155-
str(cid_freq),
156-
),
157-
' {}'.format(candidate_to_ballots_map[cid]),
158-
)
145+
if not quick:
146+
break
147+
148+
if candidate_outcomes is not None and done:
149+
for cid, cid_freq in sorted(
150+
candidate_outcomes.items(),
151+
key=lambda x: (x[1], x[0]),
152+
):
153+
if cid_freq / trials < unpopular_freq_threshold:
154+
print(
155+
' One set of ballots that elected low frequency '
156+
'candidate {} which occurred in {}% of outcomes\n'.format(
157+
str(cid),
158+
str(cid_freq),
159+
),
160+
' {}'.format(candidate_to_ballots_map[cid]),
161+
)
159162

160163
print('Elasped time: {} seconds.'.format(time() - start_time))
164+
return done

aus_senate_audit/cli.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# -*- coding: utf-8 -*-
2+
3+
""" Encapsulates a Utility Function for Creating the CLI. """
4+
5+
from argparse import ArgumentParser
6+
7+
from constants import DEFAULT_SAMPLE_INCREMENT_SIZE
8+
from constants import DEFAULT_SIMULATED_SENATE_ELECTION_NUM_BALLOTS
9+
from constants import DEFAULT_SIMULATED_SENATE_ELECTION_NUM_CANDIDATES
10+
from constants import DEFAULT_UNPOPULAR_FREQUENCY_THRESHOLD
11+
from constants import QUICK_MODE
12+
from constants import REAL_MODE
13+
from constants import SIMULATION_MODE
14+
from constants import STATES
15+
16+
# -m simulation -s SEED --num-ballots 1000 --num-candidates 10
17+
# -m quick -s SEED --state TAS --max-ballots 1000 --config-file config.json
18+
# -m real -s SEED --state TAS --config-file config.json [--selecte-ballots-file ...]
19+
20+
def parse_command_line_args():
21+
""" Parses the command line arguments for running an audit. """
22+
parser = ArgumentParser()
23+
parser.add_argument(
24+
'mode',
25+
type=str,
26+
metavar='MODE',
27+
choices=[QUICK_MODE, REAL_MODE, SIMULATION_MODE],
28+
help='Mode in which to run the audit.',
29+
)
30+
parser.add_argument(
31+
'-s',
32+
'--seed',
33+
type=int,
34+
default=1,
35+
help='Starting value of the random number generator.'
36+
)
37+
parser.add_argument(
38+
'--num-ballots',
39+
type=int,
40+
default=DEFAULT_SIMULATED_SENATE_ELECTION_NUM_BALLOTS,
41+
help='Number of cast ballots for a simulated senate election.',
42+
)
43+
parser.add_argument(
44+
'--num-candidates',
45+
type=int,
46+
default=DEFAULT_SIMULATED_SENATE_ELECTION_NUM_CANDIDATES,
47+
help='Number of candidates for a simulated senate election.',
48+
)
49+
parser.add_argument(
50+
'--state',
51+
type=str,
52+
choices=STATES,
53+
help='Abbreviation of state name to run senate election audit for.',
54+
)
55+
parser.add_argument(
56+
'-c',
57+
'--config-file',
58+
type=str,
59+
help='Path to Australian senate election configuration file (see \
60+
https://github.com/grahame/dividebatur/blob/master/aec_data/fed2016/\
61+
aec_fed2016.json as an example).',
62+
)
63+
parser.add_argument(
64+
'--sample',
65+
action='store_true',
66+
help='Whether to sample the remaining ballots or run a stage of the audit',
67+
)
68+
parser.add_argument(
69+
'--max-ballots',
70+
type=int,
71+
help='Maximum number of ballots to check for a real senate election \
72+
audit.',
73+
)
74+
parser.add_argument(
75+
'-f',
76+
'--unpopular-frequency-threshold',
77+
type=float,
78+
default=DEFAULT_UNPOPULAR_FREQUENCY_THRESHOLD,
79+
help='Upper bound on the frequency of trials a candidate is elected \
80+
in order for the candidate to be deemed unpopular.',
81+
)
82+
parser.add_argument(
83+
'--sample-increment-size',
84+
type=int,
85+
default=DEFAULT_SAMPLE_INCREMENT_SIZE,
86+
help='Number of ballots to add to growing sample.'
87+
)
88+
return parser.parse_args()

0 commit comments

Comments
 (0)