Merge pull request #5 from Bam4d/cartpole_hyperparams_2

treo · web-flow · commit 115cb275e8e0 · 2020-03-19T13:00:22.000+01:00
these hyperparameters work significantly better than previous
diff --git a/rl4j-examples/src/main/java/org/deeplearning4j/examples/rl4j/Cartpole.java b/rl4j-examples/src/main/java/org/deeplearning4j/examples/rl4j/Cartpole.java
@@ -46,9 +46,9 @@ public class Cartpole
                     150000, //Max step
                     150000, //Max size of experience replay
                     32,     //size of batches
-                    500,    //target update (hard)
+                    100,    //target update (hard)
                     10,     //num step noop warmup
-                    0.01,   //reward scaling
+                    0.1,   //reward scaling
                     0.99,   //gamma
                     1.0,    //td-error clipping
                     0.1f,   //min epsilon
@@ -58,7 +58,7 @@ public class Cartpole
 
     public static DQNFactoryStdDense.Configuration CARTPOLE_NET =
         DQNFactoryStdDense.Configuration.builder()
-            .l2(0.001).updater(new Adam(0.0005)).numHiddenNodes(16).numLayer(3).build();
+            .updater(new Adam(0.001)).numHiddenNodes(16).numLayer(3).build();
 
     public static void main(String[] args) throws IOException {
         cartPole();