Working versions of pso_evolve.py, lookup_evolve.py, and ann_evolve.py

marcharper · marcharper · commit 60d79498e8bb · 2016-12-29T23:42:15.000-08:00
diff --git a/ann_evolve.py b/ann_evolve.py
@@ -1,5 +1,5 @@
 """
-Training ANN strategies.
+Training ANN strategies with an evolutionary algorithm.
 
 Original code by Martin Jones @mojones:
 https://gist.github.com/mojones/b809ba565c93feb8d44becc7b93e37c6
@@ -13,7 +13,7 @@
     -h --help                    show this
     -g GENERATIONS               how many generations to run the program for [default: 100]
     -u MUTATION_RATE             mutation rate i.e. probability that a given value will flip [default: 0.1]
-    -d MUTATION_DISTANCE         amount of change a mutation will cause [default: 0.1]
+    -d MUTATION_DISTANCE         amount of change a mutation will cause [default: 0.5]
     -b BOTTLENECK                number of individuals to keep from each generation [default: 10]
     -i PROCESSORS                number of processors to use [default: 1]
     -o OUTPUT_FILE               file to write statistics to [default: ann_out.csv]
@@ -38,7 +38,6 @@
 def get_random_weights(number):
     return [random.uniform(-1, 1) for _ in range(number)]
 
-
 def score_single(my_strategy_factory, other_strategy_factory, length=200):
     if other_strategy_factory().classifier['stochastic']:
         repetitions = 10
@@ -57,12 +56,12 @@ def score_single(my_strategy_factory, other_strategy_factory, length=200):
         iteration_score = sum([g.score(pair)[0] for pair in
                                zip(me.history, other.history)]) / length
         all_scores.append(iteration_score)
-    return sum(all_scores)
+    return sum(all_scores) / repetitions
 
 def score_for(my_strategy_factory, other_strategies, iterations=200):
-    my_scores = map(
+    my_scores = list(map(
         lambda x: score_single(my_strategy_factory, x, iterations),
-        other_strategies)
+        other_strategies))
     my_average_score = sum(my_scores) / len(my_scores)
     return my_average_score
 
@@ -73,35 +72,33 @@ def score_weights(weights, strategies, input_values=17, hidden_layer_size=10):
 from itertools import repeat
 
 def score_all_weights(population, strategies):
-    # args = (population, strategies)
     results = pool.starmap(score_weights, zip(population, repeat(strategies)))
-    return sorted(results, reverse=True)
-    # return sorted(pool.map(score_weights, *args), reverse=True)
+    return list(sorted(results, reverse=True))
 
 def evolve(starting_weights, mutation_rate, mutation_distance, generations,
            bottleneck, strategies, output_file):
 
-    current_bests = starting_weights
+    # Append scores
+    current_bests = [[0, x] for x in starting_weights]
 
     for generation in range(generations):
-
         with open(output_file, "a") as output:
-
-            # weights_to_copy = [x[1] for x in current_bests]
-            weights_to_copy = current_bests[0:3]
+            weights_to_copy = [x[1] for x in current_bests]
             copies = []
+            # Crossover
             for w1 in weights_to_copy:
                 for w2 in weights_to_copy:
                     crossover = random.randrange(len(w1))
                     new_weights = copy.deepcopy(
                         w1[0:crossover]) + copy.deepcopy(w2[crossover:])
                     copies.append(new_weights)
 
-            for c in copies:
+            # Mutate
+            for _, c in copies:
                 for i in range(len(c)):
                     if random.random() < mutation_rate:
-                        c[i] = c[i] * (
-                            1 + (random.uniform(-1, 1) * mutation_distance))
+                        r = 1 + random.uniform(-1, 1) * mutation_distance
+                        c[i] = c[i] * r
 
             population = copies + weights_to_copy
 
@@ -119,7 +116,7 @@ def evolve(starting_weights, mutation_rate, mutation_distance, generations,
                           mutation_distance]:
                 output.write(str(value) + "\t")
             output.write("\n")
-
+            print("Generation", generation, "| Best Score:", scores[0])
             mutation_rate *= 0.99
             mutation_distance *= 0.99
 
@@ -138,9 +135,9 @@ def evolve(starting_weights, mutation_rate, mutation_distance, generations,
     starting_population = int(arguments['-k'])
     output_file = arguments['-o']
 
-    starting_weights = [get_random_weights(190) for _ in range(starting_population)]
+    starting_weights = [[0, get_random_weights(190)] for _ in range(starting_population)]
 
-    strategies = [s for s in axl.all_strategies
+    strategies = [s for s in axl.strategies
                   if not s().classifier['long_run_time']]
 
     evolve(starting_weights, mutation_rate, mutation_distance, generations,
diff --git a/axelrod_utils.py b/axelrod_utils.py
@@ -46,7 +46,8 @@ def score_for(my_strategy_factory, iterations=200):
     run 100 repetitions and take the average to get a good estimate.
     """
     scores_for_all_opponents = []
-    for opponent in axl.ordinary_strategies:
+    opponents = [s for s in axl.strategies if not s().classifier['long_run_time']]
+    for opponent in opponents:
         if opponent().classifier['stochastic']:
             repetitions = 100
         else:
diff --git a/pso_evolve.py b/pso_evolve.py
@@ -6,74 +6,13 @@
 Based on Martin Jones @mojones original LookerUp code
 """
 
-import axelrod
+from itertools import product
 
 from pyswarm import pso
 
-
-class Gambler(Player):
-
-    name = 'Gambler'
-    classifier = {
-        'memory_depth': float('inf'),
-        'stochastic': True,
-        'makes_use_of': set(),
-        'inspects_source': False,
-        'manipulates_source': False,
-        'manipulates_state': False
-    }
-
-    @init_args
-    def __init__(self, lookup_table=None):
-        """
-        If no lookup table is provided to the constructor, then use the TFT one.
-        """
-        Player.__init__(self)
-
-        if not lookup_table:
-            lookup_table = {
-            ('', 'C', 'D') : 0,
-            ('', 'D', 'D') : 0,
-            ('', 'C', 'C') : 1,
-            ('', 'D', 'C') : 1,
-        }
-
-        self.lookup_table = lookup_table
-        # Rather than pass the number of previous turns (m) to consider in as a
-        # separate variable, figure it out. The number of turns is the length
-        # of the second element of any given key in the dict.
-        self.plays = len(list(self.lookup_table.keys())[0][1])
-        # The number of opponent starting actions is the length of the first
-        # element of any given key in the dict.
-        self.opponent_start_plays = len(list(self.lookup_table.keys())[0][0])
-        # If the table dictates to ignore the opening actions of the opponent
-        # then the memory classification is adjusted
-        if self.opponent_start_plays == 0:
-            self.classifier['memory_depth'] = self.plays
-
-        # Ensure that table is well-formed
-        for k, v in lookup_table.items():
-            if (len(k[1]) != self.plays) or (len(k[0]) != self.opponent_start_plays):
-                raise ValueError("All table elements must have the same size")
-
-
-    def strategy(self, opponent):
-        # If there isn't enough history to lookup an action, cooperate.
-        if len(self.history) < max(self.plays, self.opponent_start_plays):
-            return C
-        # Count backward m turns to get my own recent history.
-        history_start = -1 * self.plays
-        my_history = ''.join(self.history[history_start:])
-        # Do the same for the opponent.
-        opponent_history = ''.join(opponent.history[history_start:])
-        # Get the opponents first n actions.
-        opponent_start = ''.join(opponent.history[:self.opponent_start_plays])
-        # Put these three strings together in a tuple.
-        key = (opponent_start, my_history, opponent_history)
-        # Look up the action associated with that tuple in the lookup table.
-        action = float(self.lookup_table[key])
-        return random_choice(action)
-
+import axelrod as axl
+from axelrod import Gambler, init_args
+from axelrod_utils import score_single
 
 
 class TestGambler(Gambler):
@@ -83,7 +22,8 @@ class TestGambler(Gambler):
 
     name = "TestGambler"
 
-    def __init__(self,pattern):
+    @init_args
+    def __init__(self, pattern):
         plays = 2
         opponent_start_plays = 2
 
@@ -102,7 +42,7 @@ def __init__(self,pattern):
         Gambler.__init__(self, lookup_table=lookup_table)
 
 
-def score_for_pattern(my_strategy_factory,pattern, iterations=200):
+def score_for_pattern(my_strategy_factory, pattern, iterations=200):
     """
     Given a function that will return a strategy,
     calculate the average score per turn
@@ -112,29 +52,31 @@ def score_for_pattern(my_strategy_factory,pattern, iterations=200):
     a good estimate.
     """
     scores_for_all_opponents = []
-    for opponent in axelrod.ordinary_strategies:
+    strategies = [s for s in axl.strategies
+                  if not s().classifier['long_run_time']]
+    for opponent in strategies:
 
-        # decide whether we need to sample or not
-        if opponent.classifier['stochastic']:
+        # Decide whether we need to sample or not
+        if opponent().classifier['stochastic']:
             repetitions = 100
         else:
             repetitions = 1
         scores_for_this_opponent = []
 
-        # calculate an average for this opponent
+        # Calculate an average for this opponent
         for _ in range(repetitions):
             me = my_strategy_factory(pattern)
             other = opponent()
-            # make sure that both players know what length the match will be
-            me.set_tournament_attributes(length=iterations)
-            other.set_tournament_attributes(length=iterations)
+            # Make sure that both players know what length the match will be
+            me.set_match_attributes(length=iterations)
+            other.set_match_attributes(length=iterations)
 
             scores_for_this_opponent.append(score_single(me, other, iterations))
 
         mean_vs_opponent = sum(scores_for_this_opponent) / len(scores_for_this_opponent)
         scores_for_all_opponents.append(mean_vs_opponent)
 
-    # calculate the average for all opponents
+    # Calculate the average for all opponents
     overall_average_score = sum(scores_for_all_opponents) / len(scores_for_all_opponents)
     return overall_average_score
 
@@ -152,7 +94,10 @@ def optimizepso(x):
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
 
-    # The parameters of phip, phig and omega will lead to slower conversion
-    xopt, fopt = pso(optimizepso, lb, ub, swarmsize=100, maxiter=20, processes=60,
-                     debug=True,
-                     phip=0.8, phig=0.8, omega=0.8)
+    # There is a multiprocessing version (0.7) of pyswarm available at
+    # https://github.com/tisimst/pyswarm
+    # Pip installs version 0.6
+    xopt, fopt = pso(optimizepso, lb, ub, swarmsize=100, maxiter=20,
+                     debug=True, phip=0.8, phig=0.8, omega=0.8)
+    print(xopt)
+    print(fopt)