Added distributions, adam and tests (surprisingly adam works worse than simple gradient descent)

KOLANICH · KOLANICH · commit 95530c4dc1da · 2019-01-08T02:08:04.000+03:00
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -0,0 +1,92 @@
+#image: pypy:latest
+image: python:latest
+
+stages:
+  - dependencies
+  - build
+  - test
+  - tooling
+
+variables:
+  GIT_DEPTH: "1"
+  PYTHONUSERBASE: "${CI_PROJECT_DIR}/python_user_packages"
+
+dependencies:
+  tags:
+    - shared
+  stage: dependencies
+  before_script:
+    - export EXECUTABLE_DEPENDENCIES_DIR=${PYTHONUSERBASE}/bin
+    - export PATH="$PATH:$EXECUTABLE_DEPENDENCIES_DIR" # don't move into `variables` any of them, it is unordered
+  script:
+    - pip3 install --user --upgrade --pre setuptools setuptools_scm
+    - pip3 install --user --upgrade --pre git+https://github.com/pypa/pip.git git+https://github.com/pypa/wheel.git
+    - pip3 install --user --upgrade --pre coverage git+https://github.com/coveralls-clients/coveralls-python.git@eba54e4d19e40e3907e5fd516f68e8b4dc9e5a31 git+https://github.com/codecov/codecov-python.git@0743daa83647f12ff31b84d07113d2c24c27b924
+    - pip3 install --upgrade --user --pre scikit-learn numpy scipy
+
+  cache:
+    key: deps
+    paths:
+      - $PYTHONUSERBASE
+
+build:
+  tags:
+    - shared
+  stage: build
+
+  before_script:
+    - export EXECUTABLE_DEPENDENCIES_DIR=${PYTHONUSERBASE}/bin
+    - export PATH="$PATH:$EXECUTABLE_DEPENDENCIES_DIR" # don't move into `variables` any of them, it is unordered
+
+  script:
+    - python3 setup.py bdist_wheel
+    - mv ./dist/*.whl ./dist/evostra-0.CI-py3-none-any.whl
+    - pip3 install --user --upgrade --pre ./dist/evostra-0.CI-py3-none-any.whl
+    - coverage run --source=evostra ./tests/tests.py
+    - coverage report -m
+    - coveralls || true
+    - codecov || true
+
+  cache:
+    key: deps
+    paths:
+      - $PYTHONUSERBASE
+
+  artifacts:
+    paths:
+      - dist
+
+sast:
+  stage: tooling
+  tags:
+    - shared
+  image: docker:latest
+  variables:
+    DOCKER_DRIVER: overlay2
+  allow_failure: true
+  services:
+    - docker:dind
+  script:
+    - docker run --env SAST_CONFIDENCE_LEVEL=5 --volume "$PWD:/code" --volume /var/run/docker.sock:/var/run/docker.sock "registry.gitlab.com/gitlab-org/security-products/sast:latest" /app/bin/run /code
+  artifacts:
+    paths:
+     - gl-sast-report.json
+
+pages:
+  stage: tooling
+  tags:
+    - shared
+  image: alpine:latest
+  allow_failure: true
+  before_script:
+    - apk update
+    - apk add doxygen
+    - apk add ttf-freefont graphviz
+  script:
+    - doxygen ./Doxyfile
+    - mv ./docs/html ./public
+  artifacts:
+    paths:
+      - public
+  only:
+    - master
diff --git a/evostra/algorithms/evolution_strategy.py b/evostra/algorithms/evolution_strategy.py
@@ -1,6 +1,8 @@
 from __future__ import print_function
 import numpy as np
+import scipy.stats as st
 import multiprocessing as mp
+from collections.abc import Iterable
 
 np.random.seed(0)
 
@@ -9,24 +11,66 @@ def worker_process(arg):
     get_reward_func, weights = arg
     return get_reward_func(weights)
 
+class WeightUpdateStrategy:
+    __slots__ = ("learning_rate",)
+    def __init__(self, dim, learning_rate):
+        self.learning_rate = learning_rate
+
+
+class strategies:
+    class GD(WeightUpdateStrategy):
+        def update(self, i, g):
+            return self.learning_rate * g
+
+
+    class Adam(WeightUpdateStrategy):
+        __slots__ = ("eps", "beta1", "beta2", "m", "v")
+        def __init__(self, dim, learning_rate, eps=1e-8, beta1=0.9, beta2=0.999):
+            super().__init__(dim, learning_rate)
+            self.eps = eps
+            self.beta1 = beta1
+            self.beta2 = beta2
+            self.m = np.zeros(dim)
+            self.v = np.zeros(dim)
+
+        def update(self, i, g):
+            self.m[i] = self.beta1 * self.m[i] + (1-self.beta1) * g
+            self.v[i] = self.beta2 * self.v[i] + (1-self.beta2) * (g**2)
+            return self.learning_rate * np.sqrt(1-self.beta2) / (1-self.beta1) * self.m[i] / np.sqrt(np.sqrt(self.v[i])+self.eps)
+
 
 class EvolutionStrategy(object):
     def __init__(self, weights, get_reward_func, population_size=50, sigma=0.1, learning_rate=0.03, decay=0.999,
-                 num_threads=1):
-
+                 num_threads=1, limits=None, printer=None, distributions=None, strategy=None):
+        if limits is None:
+            limits = (np.inf, -np.inf)
         self.weights = weights
+        self.limits = limits
         self.get_reward = get_reward_func
         self.POPULATION_SIZE = population_size
-        self.SIGMA = sigma
+        if distributions is None:
+            distributions = st.norm(loc=0., scale=sigma)
+        if isinstance(distributions, Iterable):
+            distributions = list(distributions)
+            self.SIGMA = np.array([d.std() for d in distributions])
+        else:
+            self.SIGMA = distributions.std()
+
+        self.distributions = distributions
         self.learning_rate = learning_rate
         self.decay = decay
         self.num_threads = mp.cpu_count() if num_threads == -1 else num_threads
+        if printer is None:
+            printer = print
+        self.printer = printer
+        if strategy is None:
+            strategy = strategies.GD
+        self.strategy = strategy(len(weights), self.learning_rate)
 
     def _get_weights_try(self, w, p):
         weights_try = []
         for index, i in enumerate(p):
-            jittered = self.SIGMA * i
-            weights_try.append(w[index] + jittered)
+            weights_try.append(w[index] + i)
         return weights_try
 
     def get_weights(self):
@@ -36,8 +80,13 @@ def _get_population(self):
         population = []
         for i in range(self.POPULATION_SIZE):
             x = []
-            for w in self.weights:
-                x.append(np.random.randn(*w.shape))
+            if isinstance(self.distributions, Iterable):
+                for j, w in enumerate(self.weights):
+                    x.append(self.distributions[j].rvs(*w.shape))
+            else:
+                for w in self.weights:
+                    x.append(self.distributions.rvs(*w.shape))
+
             population.append(x)
         return population
 
@@ -59,10 +108,17 @@ def _update_weights(self, rewards, population):
         if std == 0:
             return
         rewards = (rewards - rewards.mean()) / std
+        grad_factor = 1. / (self.POPULATION_SIZE * (self.SIGMA ** 2))
+
         for index, w in enumerate(self.weights):
             layer_population = np.array([p[index] for p in population])
-            update_factor = self.learning_rate / (self.POPULATION_SIZE * self.SIGMA)
-            self.weights[index] = w + update_factor * np.dot(layer_population.T, rewards).T
+            corr = np.dot(layer_population.T, rewards).T
+
+            if not isinstance(grad_factor, np.ndarray):
+                g = grad_factor * corr
+            else:
+                g = grad_factor[index] * corr
+            self.weights[index] = w + self.strategy.update(index, g)
         self.learning_rate *= self.decay
 
     def run(self, iterations, print_step=10):
@@ -75,7 +131,7 @@ def run(self, iterations, print_step=10):
             self._update_weights(rewards, population)
 
             if (iteration + 1) % print_step == 0:
-                print('iter %d. reward: %f' % (iteration + 1, self.get_reward(self.weights)))
+                self.printer('iter %d. reward: %f' % (iteration + 1, self.get_reward(self.weights)), (self.weights if self.weights.shape[0] <= 10 else None) )
         if pool is not None:
             pool.close()
             pool.join()
diff --git a/tests/tests.py b/tests/tests.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+import sys
+from pathlib import Path
+import unittest
+thisDir=Path(__file__).parent.absolute()
+sys.path.append(str(thisDir.parent))
+
+import numpy as np
+import scipy.stats as st
+from evostra import EvolutionStrategy
+
+def modRosenbrockNP(X, a=1, b=100):
+    return np.sqrt(np.power(a-X[0], 4) + b*np.power(X[1]-np.power(X[0], 2), 2))
+
+def ackleyRosenbrockNp(X, a=20, b=0.2, c=2*np.pi):
+    return np.real(a*(1-np.exp(-b*np.sqrt(modRosenbrockNP(X, a=0, b=a)/X.shape[0])))-np.exp(np.sum(np.cos(c*X), axis=0)/X.shape[0])+np.exp(1))
+
+
+bounds = np.array([[0, 10], [-10, 10]])
+initialPoint = np.array([10., 5.])
+
+def get_reward(weights):
+    weights=np.array(weights)
+    #print(weights)
+    res = -ackleyRosenbrockNp(weights)
+    #print(res)
+    return res
+
+
+class OptimizersTests(unittest.TestCase):
+    def testOptimizerSimple(self):
+        es = EvolutionStrategy(initialPoint, get_reward, population_size=50, sigma=0.5, learning_rate=0.1, decay=1., num_threads=1)
+        es.run(270, print_step=10)
+
+    @unittest.skip
+    def testOptimizerDistributions(self):
+        es = EvolutionStrategy(initialPoint, get_reward, population_size=20, learning_rate=0.03, decay=1., num_threads=1, distributions=[st.norm(loc=0., scale=0.1), st.norm(loc=0., scale=0.2)])
+        es.run(1000, print_step=1)
+
+
+if __name__ == '__main__':
+    unittest.main()