add a3c sample for cartpole.

RobAltena · treo · commit 04b06380556a · 2020-06-16T11:01:35.000+02:00
Signed-off-by: Robert Altena &lt;Rob@Ra-ai.com&gt;
diff --git a/rl4j-cartpole-examples/src/main/java/A3CCartpole.java b/rl4j-cartpole-examples/src/main/java/A3CCartpole.java
@@ -0,0 +1,81 @@
+/* *****************************************************************************
+ * Copyright (c) 2015-2019 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+import org.deeplearning4j.rl4j.learning.async.a3c.discrete.A3CDiscrete;
+import org.deeplearning4j.rl4j.learning.async.a3c.discrete.A3CDiscreteDense;
+import org.deeplearning4j.rl4j.mdp.gym.GymEnv;
+import org.deeplearning4j.rl4j.network.ac.ActorCriticFactorySeparateStdDense;
+import org.deeplearning4j.rl4j.policy.ACPolicy;
+import org.deeplearning4j.rl4j.space.Box;
+import org.deeplearning4j.rl4j.space.DiscreteSpace;
+import org.deeplearning4j.rl4j.space.Encodable;
+import org.nd4j.linalg.learning.config.Adam;
+
+import java.io.IOException;
+
+/**
+ * @author rubenfiszel (ruben.fiszel@epfl.ch) on 8/18/16.
+ *
+ * A3C on cartpole
+ * This example shows the classes in rl4j that implement the article here: https://arxiv.org/abs/1602.01783
+ * Asynchronous Methods for Deep Reinforcement Learning. Mnih et al.
+ *
+ */
+public class A3CCartpole {
+
+    public static void main(String[] args) throws IOException {
+        A3CcartPole();
+    }
+
+    private static void A3CcartPole() throws IOException {
+
+        //define the mdp from gym (name, render)
+        String envUD = "CartPole-v1";
+        GymEnv<Encodable, Integer, DiscreteSpace> mdp = new GymEnv<Encodable, Integer, DiscreteSpace>(envUD, false, false);
+
+        A3CDiscrete.A3CConfiguration CARTPOLE_A3C =
+                new A3CDiscrete.A3CConfiguration(
+                        123,            //Random seed
+                        200,    //Max step By epoch
+                        500000,      //Max step
+                        8,         //Number of threads
+                        20,             //t_max
+                        10,        //num step noop warmup
+                        0.01,     //reward scaling
+                        0.99,          //gamma
+                        1.0         //td-error clipping
+                );
+
+        ActorCriticFactorySeparateStdDense.Configuration CARTPOLE_NET_A3C =  ActorCriticFactorySeparateStdDense.Configuration
+                .builder().updater(new Adam(1e-2)).l2(0).numHiddenNodes(16).numLayer(3).build();
+
+        //define the training
+        A3CDiscreteDense<Encodable> a3c = new A3CDiscreteDense<Encodable>(mdp, CARTPOLE_NET_A3C, CARTPOLE_A3C);
+
+        a3c.train(); //start the training
+        mdp.close();
+
+        ACPolicy<org.deeplearning4j.rl4j.space.Encodable> pol = a3c.getPolicy();
+
+        pol.save("/tmp/val1/", "/tmp/pol1");
+
+        //reload the policy, will be equal to "pol", but without the randomness
+        ACPolicy<Box> pol2 = ACPolicy.load("/tmp/val1/", "/tmp/pol1");
+        Cartpole.loadCartpole(pol2, envUD);
+        System.out.println("sample finished.");
+    }
+
+}
diff --git a/rl4j-cartpole-examples/src/main/java/Cartpole.java b/rl4j-cartpole-examples/src/main/java/Cartpole.java
@@ -19,6 +19,7 @@
 import org.deeplearning4j.rl4j.mdp.gym.GymEnv;
 import org.deeplearning4j.rl4j.network.dqn.DQNFactoryStdDense;
 import org.deeplearning4j.rl4j.policy.DQNPolicy;
+import org.deeplearning4j.rl4j.policy.Policy;
 import org.deeplearning4j.rl4j.space.ActionSpace;
 import org.deeplearning4j.rl4j.space.Box;
 import org.deeplearning4j.rl4j.space.DiscreteSpace;
@@ -29,15 +30,17 @@
 /**
  * @author rubenfiszel (ruben.fiszel@epfl.ch) on 8/11/16.
  *
- * Main example for Cartpole DQN
+ * Cartpole DQN
+ * This example shows the basic rl4j classes implementing the 2013 article by Mnih et al. from deepmind.
+ * https://arxiv.org/pdf/1312.5602.pdf
  */
 public class Cartpole
 {
-    private static String envUD = "CartPole-v1";
+    private static String envID = "CartPole-v1";
 
     public static void main(String[] args) {
         DQNPolicy<Box>  pol = cartPole(); //get a trained agent to play the game.
-        loadCartpole(pol); //show off the trained agent.
+        loadCartpole(pol, envID); //show off the trained agent.
     }
 
     private static DQNPolicy<Box> cartPole() {
@@ -71,10 +74,9 @@ private static DQNPolicy<Box> cartPole() {
                         .build();
 
         //Create the gym environment. We include these through the rl4j-gym dependency.
-        GymEnv<Box, Integer, DiscreteSpace> mdp = new GymEnv<Box, Integer, DiscreteSpace>(envUD, false, false);
+        GymEnv<Box, Integer, DiscreteSpace> mdp = new GymEnv<Box, Integer, DiscreteSpace>(envID, false, false);
 
-        //Create the solver. This class implements the 2013 article by Mnih et al. from deepmind.
-        // https://arxiv.org/pdf/1312.5602.pdf
+        //Create the solver.
         QLearningDiscreteDense<Box> dql = new QLearningDiscreteDense<Box>(mdp, CARTPOLE_NET, CARTPOLE_QL);
 
         dql.train();
@@ -83,11 +85,12 @@ private static DQNPolicy<Box> cartPole() {
         return dql.getPolicy(); //return the trained agent.
     }
 
-    private static void loadCartpole(DQNPolicy<Box> pol) {
+    // pass in a generic policy and endID to allow access from other samples in this package..
+    static void loadCartpole(Policy<Box, Integer> pol, String envID) {
         //use the trained agent on a new similar mdp (but render it this time)
 
         //define the mdp from gym (name, render)
-        GymEnv<Box, Integer, ActionSpace<Integer>> mdp2 = new GymEnv<Box, Integer, ActionSpace<Integer>>(envUD, true, false);
+        GymEnv<Box, Integer, ActionSpace<Integer>> mdp2 = new GymEnv<Box, Integer, ActionSpace<Integer>>(envID, true, false);
 
         //evaluate the agent
         double rewards = 0;