Merge pull request #975 from KonduitAI/master

treo · web-flow · commit a4594ba7508a · 2020-04-24T14:40:56.000+02:00
Update RL4J examples from dev branch
diff --git a/rl4j-examples/src/main/java/org/deeplearning4j/examples/rl4j/A3CALE.java b/rl4j-examples/src/main/java/org/deeplearning4j/examples/rl4j/A3CALE.java
@@ -34,16 +34,16 @@
 public class A3CALE {
 
     public static HistoryProcessor.Configuration ALE_HP =
-            new HistoryProcessor.Configuration(
-                    4,       //History length
-                    84,      //resize width
-                    110,     //resize height
-                    84,      //crop width
-                    84,      //crop height
-                    0,       //cropping x offset
-                    0,       //cropping y offset
-                    4        //skip mod (one frame is picked every x
-            );
+        new HistoryProcessor.Configuration(
+            4,       //History length
+            84,      //resize width
+            84,     //resize height
+            160,      //crop width
+            194,      //crop height
+            0,       //cropping x offset
+            32,       //cropping y offset
+            4       //skip mod (one frame is picked every x
+        );
 
     public static A3CDiscrete.A3CConfiguration ALE_A3C =
             new A3CDiscrete.A3CConfiguration(
diff --git a/rl4j-examples/src/main/java/org/deeplearning4j/examples/rl4j/ALE.java b/rl4j-examples/src/main/java/org/deeplearning4j/examples/rl4j/ALE.java
@@ -17,6 +17,7 @@
 package org.deeplearning4j.examples.rl4j;
 
 import java.io.IOException;
+
 import org.deeplearning4j.rl4j.learning.HistoryProcessor;
 import org.deeplearning4j.rl4j.learning.sync.qlearning.QLearning;
 import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.QLearningDiscreteConv;
@@ -26,47 +27,46 @@
 
 /**
  * @author saudet
- *
+ * <p>
  * Main example for DQN with The Arcade Learning Environment (ALE)
- *
  */
 public class ALE {
 
     public static HistoryProcessor.Configuration ALE_HP =
-            new HistoryProcessor.Configuration(
-                    4,       //History length
-                    84,      //resize width
-                    110,     //resize height
-                    84,      //crop width
-                    84,      //crop height
-                    0,       //cropping x offset
-                    0,       //cropping y offset
-                    4        //skip mod (one frame is picked every x
-            );
+        new HistoryProcessor.Configuration(
+            4,       //History length
+            84,      //resize width
+            84,     //resize height
+            160,      //crop width
+            194,      //crop height
+            0,       //cropping x offset
+            32,       //cropping y offset
+            4       //skip mod (one frame is picked every x
+        );
 
     public static QLearning.QLConfiguration ALE_QL =
-            new QLearning.QLConfiguration(
-                    123,      //Random seed
-                    10000,    //Max step By epoch
-                    8000000,  //Max step
-                    1000000,  //Max size of experience replay
-                    32,       //size of batches
-                    10000,    //target update (hard)
-                    500,      //num step noop warmup
-                    0.1,      //reward scaling
-                    0.99,     //gamma
-                    100.0,    //td-error clipping
-                    0.1f,     //min epsilon
-                    100000,   //num step for eps greedy anneal
-                    true      //double-dqn
-            );
+        new QLearning.QLConfiguration(
+            123,      //Random seed
+            10000,    //Max step By epoch
+            8000000,  //Max step
+            1000000,  //Max size of experience replay
+            32,       //size of batches
+            10000,    //target update (hard)
+            500,      //num step noop warmup
+            0.1,      //reward scaling
+            0.99,     //gamma
+            100.0,    //td-error clipping
+            0.1f,     //min epsilon
+            100000,   //num step for eps greedy anneal
+            true      //double-dqn
+        );
 
     public static DQNFactoryStdConv.Configuration ALE_NET_QL =
-            new DQNFactoryStdConv.Configuration(
-                    0.00025, //learning rate
-                    0.000,   //l2 regularization
-                    null, null
-            );
+        new DQNFactoryStdConv.Configuration(
+            0.00025, //learning rate
+            0.000,   //l2 regularization
+            null, null
+        );
 
     public static void main(String[] args) throws IOException {