Update evolutionary algorithms example

m-wojnar · m-wojnar · commit 5ca388015252 · 2025-09-30T11:18:12.000+02:00
diff --git a/examples/cart-pole-vectorized/main.py b/examples/cart-pole-vectorized/main.py
@@ -82,7 +82,7 @@ def run(time_limit: float, num_envs: int, seed: int) -> None:
         },
         ext_type=GymnasiumVectorized,
         ext_params={'env_id': 'CartPole-v1', 'num_envs': num_envs},
-        logger_types=[CsvLogger],
+        logger_types=CsvLogger,
         logger_params={'csv_path': f'cartpole-ppo-{num_envs}-envs-{seed}.csv'}
     )
 
diff --git a/examples/pendulum-evo/main.py b/examples/pendulum-evo/main.py
@@ -3,13 +3,14 @@
 import evosax.algorithms
 import gymnasium as gym
 import numpy as np
+import optax
 from chex import Array
 from flax import linen as nn
 
 from reinforced_lib import RLib
 from reinforced_lib.agents.neuro import Evosax
 from reinforced_lib.exts import GymnasiumVectorized
-from reinforced_lib.logs import CsvLogger, StdoutLogger
+from reinforced_lib.logs import CsvLogger
 
 
 class Network(nn.Module):
@@ -40,17 +41,23 @@ def run(evo_alg: type, num_epochs: int, population_size: int, seed: int) -> None
         Integer used as the random key.
     """
 
+    if isinstance(evo_alg, evosax.algorithms.SimpleES):
+        evo_kwargs = {'optimizer': optax.adam(0.03)}
+    else:
+        evo_kwargs = {}
+
     rl = RLib(
         agent_type=Evosax,
         agent_params={
             'network': Network(),
             'evo_strategy': evo_alg,
-            'evo_strategy_default_params': {'std_init': 0.1},
+            'evo_strategy_kwargs': evo_kwargs,
+            'evo_strategy_default_params': {'std_init': 0.05},
             'population_size': population_size
         },
         ext_type=GymnasiumVectorized,
         ext_params={'env_id': 'Pendulum-v1', 'num_envs': population_size},
-        logger_types=[CsvLogger, StdoutLogger],
+        logger_types=CsvLogger,
         logger_params={'csv_path': f'pendulum-{evo_alg.__name__}-evo-{seed}.csv'}
     )
 
@@ -78,7 +85,7 @@ def make_env():
     args = ArgumentParser()
 
     args.add_argument('--evo_alg', type=str, required=True)
-    args.add_argument('--num_epochs', default=300, type=int)
+    args.add_argument('--num_epochs', default=500, type=int)
     args.add_argument('--population_size', default=64, type=int)
     args.add_argument('--seed', default=42, type=int)
 
diff --git a/examples/pendulum-evo/run_all.sh b/examples/pendulum-evo/run_all.sh
@@ -1,11 +1,11 @@
 #!/bin/bash
 
-evo_algs=("CMA_ES" "PGPE" "SimpleGA")
+evo_algs=("CMA_ES" "PGPE" "SimpleES")
 seeds=(1 2 3 4 5 6 7 8 9 10)
 
 for alg in "${evo_algs[@]}"; do
     for s in "${seeds[@]}"; do
-        echo "Running with $n environments and seed $s"
+        echo "Running with algorithm $alg and seed $s"
         python main.py --evo_alg $alg --seed $s
     done
 done
diff --git a/reinforced_lib/agents/neuro/evosax.py b/reinforced_lib/agents/neuro/evosax.py
@@ -44,15 +44,22 @@ class EvosaxState(AgentState):
 
 class Evosax(BaseAgent):
     r"""
-    Evolution strategies (ES)-based agent using the evosax library [12]_. This implementation maintains a population
+    Evolution strategies (ES)-based agent using the ``evosax`` library [12]_. This implementation maintains a population
     of candidate solutions (parameter vectors), evaluates them in parallel across environments, and updates the
     population by applying an evolutionary algorithm. Unlike gradient-based RL methods, this agent does not rely
     on backpropagation through the value or policy network. Instead, the network parameters are evolved using
     black-box optimization. This agent is suitable for environments with both discrete and continuous action spaces.
-    The user is responsible for providing appropriate network output in the correct format (e.g., discrete actions
-    should be sampled from logits with ``jax.random.categorical`` inside the network definition). Note that
-    this agent does not discount future rewards, therefore, the fitness is computed as a simple sum of rewards
-    obtained during the evaluation phase.
+
+    **Note!** The user is responsible for providing appropriate network output in the correct format (e.g., discrete
+    actions should be sampled from logits with ``jax.random.categorical`` inside the network definition).
+
+    **Note!** This agent does not discount future rewards, therefore, the fitness is computed as a simple sum of
+    rewards obtained during the evaluation phase.
+
+    **Note!** This agent is compatible only with distribution-based evolution strategies from the ``evosax`` library
+    (see `this list <https://github.com/RobertTLange/evosax/tree/main/evosax/algorithms/distribution_based>`_ for
+    available algorithms). Population-based methods (`listed here <https://github.com/RobertTLange/evosax/tree/main/evosax/algorithms/population_based>`_
+    will be supported in future releases.
 
     Parameters
     ----------

Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,7 @@ def run(time_limit: float, num_envs: int, seed: int) -> None:`
`82`	`82`	`},`
`83`	`83`	`ext_type=GymnasiumVectorized,`
`84`	`84`	`ext_params={'env_id': 'CartPole-v1', 'num_envs': num_envs},`
`85`		`- logger_types=[CsvLogger],`
	`85`	`+ logger_types=CsvLogger,`
`86`	`86`	`logger_params={'csv_path': f'cartpole-ppo-{num_envs}-envs-{seed}.csv'}`
`87`	`87`	`)`
`88`	`88`