first commit

RobAltena · treo · commit 1b023ab39641 · 2020-06-16T11:01:25.000+02:00
Signed-off-by: Paul Dubs &lt;paul.dubs@gmail.com&gt;
diff --git a/rl4j-ale-examples/.gitignore b/rl4j-ale-examples/.gitignore
@@ -0,0 +1,5 @@
+/.idea
+/target
+*.iml
+pong.bin
+ale-a3c.model
diff --git a/rl4j-ale-examples/pom.xml b/rl4j-ale-examples/pom.xml
@@ -0,0 +1,78 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>ArcadeLearningEnvironment</groupId>
+    <artifactId>ArcadeLearningEnvironment</artifactId>
+    <version>1.0-SNAPSHOT</version>
+    <properties>
+        <nd4j.version>1.0.0-SNAPSHOT</nd4j.version>
+        <rl4j.version>1.0.0-SNAPSHOT</rl4j.version>
+        <logback.version>1.1.7</logback.version>
+
+        <nd4j.backend>nd4j-native-platform</nd4j.backend>
+    </properties>
+
+    <repositories>
+        <repository>
+            <id>snapshots-repo</id>
+            <url>https://oss.sonatype.org/content/repositories/snapshots</url>
+            <releases>
+                <enabled>false</enabled>
+            </releases>
+            <snapshots>
+                <enabled>true</enabled>
+                <updatePolicy>daily</updatePolicy>  <!-- Optional, update daily -->
+            </snapshots>
+        </repository>
+    </repositories>
+    <dependencies>
+        <!-- ND4J backend. You need one in every DL4J project. Normally define artifactId as either nd4j-native-platform or nd4j-cuda-X.X-platform to use CUDA GPUs (check parent pom for supported cuda versions) -->
+        <dependency>
+            <groupId>org.nd4j</groupId>
+            <artifactId>${nd4j.backend}</artifactId>
+            <version>${nd4j.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.deeplearning4j</groupId>
+            <artifactId>rl4j-core</artifactId>
+            <version>${rl4j.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.deeplearning4j</groupId>
+            <artifactId>rl4j-gym</artifactId>
+            <version>${rl4j.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.deeplearning4j</groupId>
+            <artifactId>rl4j-ale</artifactId>
+            <version>${rl4j.version}</version>
+        </dependency>
+        <!-- The Arcade Learning Environment (ALE) is under GPL license, so we cannot use it as a dependency of RL4J. -->
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>ale-platform</artifactId>
+            <version>0.6.0-1.5.2</version>
+        </dependency>
+        <dependency>
+            <groupId>org.deeplearning4j</groupId>
+            <artifactId>rl4j-malmo</artifactId>
+            <version>${rl4j.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>3.8.1</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <version>${logback.version}</version>
+        </dependency>
+    </dependencies>
+
+</project>
diff --git a/rl4j-ale-examples/src/main/java/A3CALE.java b/rl4j-ale-examples/src/main/java/A3CALE.java
@@ -0,0 +1,82 @@
+/* *****************************************************************************
+ * Copyright (c) 2015-2019 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+import org.deeplearning4j.rl4j.learning.HistoryProcessor;
+import org.deeplearning4j.rl4j.learning.async.a3c.discrete.A3CDiscrete;
+import org.deeplearning4j.rl4j.learning.async.a3c.discrete.A3CDiscreteConv;
+import org.deeplearning4j.rl4j.mdp.ale.ALEMDP;
+import org.deeplearning4j.rl4j.network.ac.ActorCriticFactoryCompGraphStdConv;
+import org.nd4j.linalg.learning.config.Adam;
+
+import java.io.IOException;
+
+/**
+ * @author saudet
+ *
+ * Main example for A3C with The Arcade Learning Environment (ALE)
+ *
+ */
+public class A3CALE {
+
+    public static void main(String[] args) throws IOException {
+        HistoryProcessor.Configuration ALE_HP = new HistoryProcessor.Configuration(
+                        4,       //History length
+                        84,      //resize width
+                        110,     //resize height
+                        84,      //crop width
+                        84,      //crop height
+                        0,       //cropping x offset
+                        0,       //cropping y offset
+                        4        //skip mod (one frame is picked every x
+                );
+
+        A3CDiscrete.A3CConfiguration ALE_A3C = new A3CDiscrete.A3CConfiguration(
+                        123,            //Random seed
+                        10000,          //Max step By epoch
+                        8000000,        //Max step
+                        8,              //Number of threads
+                        32,             //t_max
+                        500,            //num step noop warmup
+                        0.1,            //reward scaling
+                        0.99,           //gamma
+                        10.0            //td-error clipping
+                );
+
+        final ActorCriticFactoryCompGraphStdConv.Configuration ALE_NET_A3C =
+                new ActorCriticFactoryCompGraphStdConv.Configuration(
+                        0.000,   //l2 regularization
+                        new Adam(0.00025), //learning rate
+                        null, false
+                );
+
+
+
+        //setup the emulation environment through ALE, you will need a ROM file
+        ALEMDP mdp = new ALEMDP("pong.bin");
+
+        //setup the training
+        A3CDiscreteConv<ALEMDP.GameScreen> a3c = new A3CDiscreteConv<ALEMDP.GameScreen>(mdp, ALE_NET_A3C, ALE_HP, ALE_A3C);
+
+        //start the training
+        a3c.train();
+
+        //save the model at the end
+        a3c.getPolicy().save("ale-a3c.model");
+
+        //close the ALE env
+        mdp.close();
+    }
+}
diff --git a/rl4j-ale-examples/src/main/java/ALE.java b/rl4j-ale-examples/src/main/java/ALE.java
@@ -0,0 +1,81 @@
+/* *****************************************************************************
+ * Copyright (c) 2015-2019 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+import java.io.IOException;
+import org.deeplearning4j.rl4j.learning.HistoryProcessor;
+import org.deeplearning4j.rl4j.learning.sync.qlearning.QLearning;
+import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.QLearningDiscreteConv;
+import org.deeplearning4j.rl4j.mdp.ale.ALEMDP;
+import org.deeplearning4j.rl4j.network.dqn.DQNFactoryStdConv;
+
+/**
+ * @author saudet
+ *
+ * Main example for DQN with The Arcade Learning Environment (ALE)
+ * This sample shows how to set up a simple ALE for training. This setup will take a long time to master the game.
+ */
+public class ALE {
+
+    public static void main(String[] args) throws IOException {
+
+        HistoryProcessor.Configuration ALE_HP = new HistoryProcessor.Configuration(
+                        4,       //History length
+                        84,      //resize width
+                        110,     //resize height
+                        84,      //crop width
+                        84,      //crop height
+                        0,       //cropping x offset
+                        0,       //cropping y offset
+                        4        //skip mod (one frame is picked every x
+                );
+
+        QLearning.QLConfiguration ALE_QL =
+                new QLearning.QLConfiguration(
+                        123,      //Random seed
+                        10000,    //Max step By epoch
+                        8000000,  //Max step
+                        1000000,  //Max size of experience replay
+                        32,       //size of batches
+                        10000,    //target update (hard)
+                        500,      //num step noop warmup
+                        0.1,      //reward scaling
+                        0.99,     //gamma
+                        100.0,    //td-error clipping
+                        0.1f,     //min epsilon
+                        100000,   //num step for eps greedy anneal
+                        true      //double-dqn
+                );
+
+        DQNFactoryStdConv.Configuration ALE_NET_QL =
+                new DQNFactoryStdConv.Configuration(
+                        0.00025, //learning rate
+                        0.000,   //l2 regularization
+                        null, null
+                );
+
+        //setup the emulation environment through ALE, you will need a ROM file
+        // set render to true to see the agent play (poorly). You can also see how slowly the data is generated and
+        // understand why training would take a long time.
+        ALEMDP mdp = new ALEMDP("E:\\projects\\ArcadeLearningEnvironment\\pong.bin", false);
+
+        //setup the training
+        QLearningDiscreteConv<ALEMDP.GameScreen> dql = new QLearningDiscreteConv<ALEMDP.GameScreen>(mdp, ALE_NET_QL, ALE_HP, ALE_QL);
+
+        dql.train(); //start the training
+        dql.getPolicy().save("ale-dql.model"); //save the model at the end
+        mdp.close();
+    }
+}
diff --git a/rl4j-ale-examples/src/main/java/PlayALE.java b/rl4j-ale-examples/src/main/java/PlayALE.java
@@ -0,0 +1,26 @@
+import org.deeplearning4j.rl4j.mdp.ale.ALEMDP;
+import org.deeplearning4j.rl4j.policy.ACPolicy;
+
+import java.io.IOException;
+import java.util.logging.Logger;
+
+public class PlayALE {
+    public static void main(String[] args) throws IOException {
+        ALEMDP mdp = new ALEMDP("pong.bin");
+
+        //load the previous agent
+        ACPolicy<ALEMDP.GameScreen> pol2 = ACPolicy.load("ale-a3c.model");
+
+        //evaluate the agent
+        double rewards = 0;
+        for (int i = 0; i < 10; i++) {
+            mdp.reset();
+            double reward = pol2.play(mdp);
+            rewards += reward;
+            Logger.getAnonymousLogger().info("Reward: " + reward);
+        }
+
+        Logger.getAnonymousLogger().info("average: " + rewards/1000);
+
+    }
+}

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +/.idea
 +/target
 +*.iml
 +pong.bin
 +ale-a3c.model