berjc
diff --git a/‎aus_senate_audit/audit_info.py‎
Lines changed: 131 additions & 0 deletions b/‎aus_senate_audit/audit_info.py‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎aus_senate_audit/audit_validator.py‎
Lines changed: 55 additions & 0 deletions b/‎aus_senate_audit/audit_validator.py‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎aus_senate_audit/audits/bayesian_audit.py‎
Lines changed: 23 additions & 19 deletions b/‎aus_senate_audit/audits/bayesian_audit.py‎
Lines changed: 23 additions & 19 deletions
diff --git a/‎aus_senate_audit/cli.py‎
Lines changed: 88 additions & 0 deletions b/‎aus_senate_audit/cli.py‎
Lines changed: 88 additions & 0 deletions
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+
+""" Encapsulates Utilities for Recording Intermediate Audit Information. """
+
+from json import load
+from json import dumps
+from os import makedirs
+from os.path import exists
+
+from constants import AGGREGATE_BALLOTS_FILE_NAME
+from constants import AUDIT_DIR_NAME
+from constants import AUDIT_INFO_FILE_NAME
+from constants import COLUMN_HEADERS
+from constants import MATCH_HEADERS
+from constants import ROUND_DIR_NAME
+from constants import SELECTED_BALLOTS_FILE_NAME
+from constants import AUDIT_STAGE_KEY
+from constants import SAMPLE_SIZE_KEY
+
+
+class AuditInfo(object):
+    """ Encapsulates utilities for recording intermediate audit information.
+
+    :ivar str state: The abbreviated name of the state to run the audit for.
+    """
+    def __init__(self, state):
+        """ Initializes an :class:`AuditResults` object.
+
+        :param str state: The abbreviated name of the state to run the audit for.
+        """
+        self._state = state
+        if not exists(self.get_audit_dir_name()):
+            makedirs('{}/{}'.format(self.get_audit_dir_name(), ROUND_DIR_NAME))
+            self.set_audit_info(0, 0)
+            self.initialize_aggregate_ballots_csv()
+
+    def get_audit_dir_name(self):
+        """ Returns the audit directory name for the given state.
+
+        :returns: The audit directory name for the given state.
+        :rtype: str
+        """
+        return AUDIT_DIR_NAME.format(self._state)
+
+    def get_audit_results_file_path(self, file_name):
+        """ Returns the file path for the given audit results file name.
+
+        :param str file_name: The name of the file to return the full file path for.
+
+        :returns: The file path for the given audit results file name.
+        :rtype: str
+        """
+        return '{}/{}'.format(
+            self.get_audit_dir_name(),
+            file_name,
+        )
+
+    def initialize_aggregate_ballots_csv(self):
+        """ Initializes the CSV file holding the aggregate ballots for the current sample. """
+        with open(self.get_audit_results_file_path(AGGREGATE_BALLOTS_FILE_NAME), 'w') as f:
+            f.write('{}\n'.format(','.join(COLUMN_HEADERS)))
+            f.write('{}\n'.format(
+                ','.join(['------------', '---------------------', '---------------------', '-------', '-------', '-----------'])
+            ))
+
+    def add_new_ballots_to_aggregate(self):
+        """ """
+        with open(self.get_audit_results_file_path(AGGREGATE_BALLOTS_FILE_NAME), 'a') as f:
+            new_ballots = [line for line in open(SELECTED_BALLOTS_FILE_NAME, 'r')][1:]
+            for new_ballot in new_ballots:
+                f.write(new_ballot)
+
+    def set_audit_info(self, audit_stage, sample_size):
+        """ Sets informatinon about the audit recored thus far.
+
+        :param int audit_stage: The new stage of the audit.
+        :param int sample_size: The sample size of the audit.
+        """
+        open(self.get_audit_results_file_path(AUDIT_INFO_FILE_NAME), 'w').write(
+            dumps({AUDIT_STAGE_KEY: audit_stage, SAMPLE_SIZE_KEY: sample_size})
+        )
+
+    def get_audit_info(self):
+        """ Returns information about the audit recorded thus far.
+
+        Example of the audit information.
+
+        .. code-block:: python
+
+            {
+                'audit_stage': 1,
+                'sample_size': 1200,
+
+            }
+
+        :returns: A dictionary containing information about the audit recorded
+            thus far.
+        :rtype: dict
+        """
+        return load(open(self.get_audit_results_file_path(AUDIT_INFO_FILE_NAME), 'r'))
+
+    def write_audit_round_file(self, audit_stage, sample):
+        """ """
+        with open(self.get_audit_results_file_path(
+            '{}/round_{}.csv'.format(ROUND_DIR_NAME, audit_stage)),
+             'w',
+        ) as f:
+            f.write('{}\n'.format(','.join(COLUMN_HEADERS + MATCH_HEADERS)))
+            for ballot in sample:
+                f.write('{}\n'.format(ballot))
+
+    def write_selected_ballots_file(self, sample, quick):
+        """ """
+        with open(SELECTED_BALLOTS_FILE_NAME, 'w') as f:
+            f.write('{}\n'.format(','.join(COLUMN_HEADERS)))
+            for ballot in sample:
+                if quick:
+                    f.write('{}\n'.format(ballot))
+                else:
+                    f.write('{}\n'.format(ballot.split('"')[0]))
+
+    def set_current_audit_round_file(self, ballots):
+        """ """
+        self.write_audit_round_file(self.get_audit_info()[AUDIT_STAGE_KEY], ballots)
+
+    def get_current_audit_round_file_name(self):
+        """ """
+        return self.get_audit_results_file_path('{}/round_{}.csv'.format(
+            ROUND_DIR_NAME,
+            self.get_audit_info()[AUDIT_STAGE_KEY]),
+        )
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+""" Validates Paper Preferences Against Electronic Preferences. """
+
+from constants import AUDIT_STAGE_KEY
+from constants import SELECTED_BALLOTS_FILE_NAME
+
+
+class AuditValidator(object):
+    """ Validates paper preferences against electronic preferences.
+
+    :ivar :class:`AuditInfo` audit_info: An object for interfacing with
+        audit information recorded thus far.
+    """
+
+    def __init__(self, audit_info):
+        """ Initializes a :class:`AuditValidator` object.
+
+        :param :class:`AuditInfo` audit_info: An object for interfacing
+            with audit information recorded thus far.
+        """
+        self._audit_info = audit_info
+
+    @staticmethod
+    def get_paper_preference_readings():
+        """ """
+        with open(SELECTED_BALLOTS_FILE_NAME, 'r') as f:
+            f.readline()  # Skip the header.
+            paper_preferences = []
+            for line in f:
+                paper_preferences.append(line.rstrip().split('"')[1])
+        return paper_preferences
+
+    def get_electronic_ballots(self):
+        """ """
+        audit_round_file_name = self._audit_info.get_current_audit_round_file_name()
+        with open(audit_round_file_name, 'r') as f:
+            f.readline()  # Skip the header.
+            ballots = [line.rstrip() for line in f]
+        return ballots
+
+    def compare(self):
+        """ """
+        paper_preferences = self.get_paper_preference_readings()
+        electronic_ballots = self.get_electronic_ballots()
+        match_records = []
+        for i in range(len(electronic_ballots)):
+            electronic_preferences = electronic_ballots[i].split('"')[1]
+            match = int(electronic_preferences == paper_preferences[i])
+            match_records.append(electronic_ballots[i] + ',{},"{}"\n'.format(
+                match,
+                paper_preferences[i],
+            ))
+        self._audit_info.set_current_audit_round_file(match_records)
+        self._audit_info.add_new_ballots_to_aggregate()
@@ -27,15 +27,15 @@ def get_new_ballot_weights(election, r):
     new_ballot_weights = {}
     total = 0
     for ballot in election.get_ballots():
-        weight = election.get_weight_of_ballot(ballot)
+        weight = election.get_ballot_weight(ballot)
         new_ballot_weights[ballot] = gammavariate(weight, 1) if weight else 0
         total += new_ballot_weights[ballot]
     for ballot in election.get_ballots():
         new_ballot_weights[ballot] = int(r * new_ballot_weights[ballot] / total)
     return new_ballot_weights
 
 
-def audit(election, seed, unpopular_freq_threshold, alpha=0.05, trials=100):
+def audit(election, seed, unpopular_freq_threshold, stage_counter=0, alpha=0.05, trials=100, quick=False):
     """ Runs a Bayesian audit on the given senate election.
 
     :param :class:`SenateElection` election: The senate election to audit.
@@ -70,9 +70,6 @@ def audit(election, seed, unpopular_freq_threshold, alpha=0.05, trials=100):
     for cid in election.get_candidate_ids():
         election.add_ballot((cid,), 1)
 
-    # Current stage of the audit.
-    stage_counter = 0
-
     # Mapping from candidates to the set of ballots that elected them.
     candidate_to_ballots_map = {}
     candidate_outcomes = None
@@ -128,7 +125,7 @@ def audit(election, seed, unpopular_freq_threshold, alpha=0.05, trials=100):
                 ]),
             ),
         )
-
+        done = False
         if freq >= trials * (1 - alpha):
             print(
                 'Stopping because audit confirmed outcome:\n',
@@ -137,24 +134,31 @@ def audit(election, seed, unpopular_freq_threshold, alpha=0.05, trials=100):
                     election.get_num_ballots_drawn(),
                 ),
             )
+            done = True
             break
 
         if election.get_num_ballots_drawn() >= election.get_num_cast_ballots():
             print('Audit has looked at all ballots. Done.')
+            done = True
             break
 
-    for cid, cid_freq in sorted(
-            candidate_outcomes.items(),
-            key=lambda x: (x[1], x[0]),
-        ):
-        if cid_freq / trials < unpopular_freq_threshold:
-            print(
-                '  One set of ballots that elected low frequency '
-                'candidate {} which occurred in {}% of outcomes\n'.format(
-                        str(cid),
-                        str(cid_freq),
-                ),
-                '  {}'.format(candidate_to_ballots_map[cid]),
-            )
+        if not quick:
+            break
+
+    if candidate_outcomes is not None and done:
+        for cid, cid_freq in sorted(
+                candidate_outcomes.items(),
+                key=lambda x: (x[1], x[0]),
+            ):
+            if cid_freq / trials < unpopular_freq_threshold:
+                print(
+                    '  One set of ballots that elected low frequency '
+                    'candidate {} which occurred in {}% of outcomes\n'.format(
+                            str(cid),
+                            str(cid_freq),
+                    ),
+                    '  {}'.format(candidate_to_ballots_map[cid]),
+                )
 
     print('Elasped time: {} seconds.'.format(time() - start_time))
+    return done
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+
+""" Encapsulates a Utility Function for Creating the CLI. """
+
+from argparse import ArgumentParser
+
+from constants import DEFAULT_SAMPLE_INCREMENT_SIZE
+from constants import DEFAULT_SIMULATED_SENATE_ELECTION_NUM_BALLOTS
+from constants import DEFAULT_SIMULATED_SENATE_ELECTION_NUM_CANDIDATES
+from constants import DEFAULT_UNPOPULAR_FREQUENCY_THRESHOLD
+from constants import QUICK_MODE
+from constants import REAL_MODE
+from constants import SIMULATION_MODE
+from constants import STATES
+
+# -m simulation -s SEED --num-ballots 1000 --num-candidates 10
+# -m quick -s SEED --state TAS --max-ballots 1000 --config-file config.json
+# -m real -s SEED --state TAS --config-file config.json [--selecte-ballots-file ...]
+
+def parse_command_line_args():
+    """ Parses the command line arguments for running an audit. """
+    parser = ArgumentParser()
+    parser.add_argument(
+        'mode',
+        type=str,
+        metavar='MODE',
+        choices=[QUICK_MODE, REAL_MODE, SIMULATION_MODE],
+        help='Mode in which to run the audit.',
+    )
+    parser.add_argument(
+        '-s',
+        '--seed',
+        type=int,
+        default=1,
+        help='Starting value of the random number generator.'
+    )
+    parser.add_argument(
+        '--num-ballots',
+        type=int,
+        default=DEFAULT_SIMULATED_SENATE_ELECTION_NUM_BALLOTS,
+        help='Number of cast ballots for a simulated senate election.',
+    )
+    parser.add_argument(
+        '--num-candidates',
+        type=int,
+        default=DEFAULT_SIMULATED_SENATE_ELECTION_NUM_CANDIDATES,
+        help='Number of candidates for a simulated senate election.',
+    )
+    parser.add_argument(
+        '--state',
+        type=str,
+        choices=STATES,
+        help='Abbreviation of state name to run senate election audit for.',
+    )
+    parser.add_argument(
+        '-c',
+        '--config-file',
+        type=str,
+        help='Path to Australian senate election configuration file (see \
+        https://github.com/grahame/dividebatur/blob/master/aec_data/fed2016/\
+        aec_fed2016.json as an example).',
+    )
+    parser.add_argument(
+        '--sample',
+        action='store_true',
+        help='Whether to sample the remaining ballots or run a stage of the audit',
+    )
+    parser.add_argument(
+        '--max-ballots',
+        type=int,
+        help='Maximum number of ballots to check for a real senate election \
+        audit.',
+    )
+    parser.add_argument(
+        '-f',
+        '--unpopular-frequency-threshold',
+        type=float,
+        default=DEFAULT_UNPOPULAR_FREQUENCY_THRESHOLD,
+        help='Upper bound on the frequency of trials a candidate is elected \
+        in order for the candidate to be deemed unpopular.',
+    )
+    parser.add_argument(
+        '--sample-increment-size',
+        type=int,
+        default=DEFAULT_SAMPLE_INCREMENT_SIZE,
+        help='Number of ballots to add to growing sample.'
+    )
+    return parser.parse_args()