adds player.

RobAltena · treo · commit 8c5b8a7ce5b1 · 2020-06-16T11:01:28.000+02:00
Signed-off-by: Robert Altena &lt;Rob@Ra-ai.com&gt;
diff --git a/rl4j-ale-examples/pom.xml b/rl4j-ale-examples/pom.xml
@@ -69,9 +69,9 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>ch.qos.logback</groupId>
-            <artifactId>logback-classic</artifactId>
-            <version>${logback.version}</version>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-nop</artifactId>
+            <version>1.7.26</version>
         </dependency>
     </dependencies>
 
diff --git a/rl4j-ale-examples/src/main/java/ALE.java b/rl4j-ale-examples/src/main/java/ALE.java
@@ -32,15 +32,15 @@ public class ALE {
     public static void main(String[] args) throws IOException {
 
         HistoryProcessor.Configuration ALE_HP = new HistoryProcessor.Configuration(
-                        4,       //History length
-                        84,      //resize width
-                        110,     //resize height
-                        84,      //crop width
-                        84,      //crop height
-                        0,       //cropping x offset
-                        0,       //cropping y offset
-                        4        //skip mod (one frame is picked every x
-                );
+                4,       //History length
+                84,      //resize width
+                110,     //resize height
+                84,      //crop width
+                84,      //crop height
+                0,       //cropping x offset
+                0,       //cropping y offset
+                4        //skip mod (one frame is picked every x
+        );
 
         QLearning.QLConfiguration ALE_QL =
                 new QLearning.QLConfiguration(
@@ -69,7 +69,7 @@ public static void main(String[] args) throws IOException {
         //setup the emulation environment through ALE, you will need a ROM file
         // set render to true to see the agent play (poorly). You can also see how slowly the data is generated and
         // understand why training would take a long time.
-        ALEMDP mdp = new ALEMDP("E:\\projects\\ArcadeLearningEnvironment\\pong.bin", false);
+        ALEMDP mdp = new ALEMDP("pong.bin", true);
 
         //setup the training
         QLearningDiscreteConv<ALEMDP.GameScreen> dql = new QLearningDiscreteConv<ALEMDP.GameScreen>(mdp, ALE_NET_QL, ALE_HP, ALE_QL);
diff --git a/rl4j-ale-examples/src/main/java/PlayALE.java b/rl4j-ale-examples/src/main/java/PlayALE.java
@@ -1,26 +1,50 @@
+/* *****************************************************************************
+ * Copyright (c) 2020 Konduit, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+import org.deeplearning4j.rl4j.learning.HistoryProcessor;
 import org.deeplearning4j.rl4j.mdp.ale.ALEMDP;
 import org.deeplearning4j.rl4j.policy.ACPolicy;
 
 import java.io.IOException;
-import java.util.logging.Logger;
 
+/**
+ *  @author robaltena
+ *
+ *  This sample shows how to plat an ALE game with a trained model.
+ */
 public class PlayALE {
     public static void main(String[] args) throws IOException {
-        ALEMDP mdp = new ALEMDP("E:\\projects\\ArcadeLearningEnvironment\\pong.bin");
+        ALEMDP mdp = new ALEMDP("pong.bin", true);
 
-        //load the previous agent
+        //load the trained agent
         ACPolicy<ALEMDP.GameScreen> pol2 = ACPolicy.load("ale-a3c.model");
 
-        //evaluate the agent
-        double rewards = 0;
-        for (int i = 0; i < 10; i++) {
-            mdp.reset();
-            double reward = pol2.play(mdp);
-            rewards += reward;
-            Logger.getAnonymousLogger().info("Reward: " + reward);
-        }
-
-        Logger.getAnonymousLogger().info("average: " + rewards/1000);
+        //The training history processor used for data pre processing steps.
+        HistoryProcessor.Configuration ALE_HP = new HistoryProcessor.Configuration(
+                4,       //History length
+                84,      //resize width
+                110,     //resize height
+                84,      //crop width
+                84,      //crop height
+                0,       //cropping x offset
+                0,       //cropping y offset
+                4        //skip mod (one frame is picked every x
+        );
 
+        pol2.play(mdp, ALE_HP);
+        mdp.close();
     }
 }
diff --git a/rl4j-ale-examples/src/test/java/AleTest.java b/rl4j-ale-examples/src/test/java/AleTest.java