Skip to content

Commit 8c5b8a7

Browse files
RobAltenatreo
authored andcommitted
adds player.
Signed-off-by: Robert Altena <[email protected]>
1 parent 7b0a63c commit 8c5b8a7

File tree

4 files changed

+50
-99
lines changed

4 files changed

+50
-99
lines changed

rl4j-ale-examples/pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@
6969
<scope>test</scope>
7070
</dependency>
7171
<dependency>
72-
<groupId>ch.qos.logback</groupId>
73-
<artifactId>logback-classic</artifactId>
74-
<version>${logback.version}</version>
72+
<groupId>org.slf4j</groupId>
73+
<artifactId>slf4j-nop</artifactId>
74+
<version>1.7.26</version>
7575
</dependency>
7676
</dependencies>
7777

rl4j-ale-examples/src/main/java/ALE.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,15 @@ public class ALE {
3232
public static void main(String[] args) throws IOException {
3333

3434
HistoryProcessor.Configuration ALE_HP = new HistoryProcessor.Configuration(
35-
4, //History length
36-
84, //resize width
37-
110, //resize height
38-
84, //crop width
39-
84, //crop height
40-
0, //cropping x offset
41-
0, //cropping y offset
42-
4 //skip mod (one frame is picked every x
43-
);
35+
4, //History length
36+
84, //resize width
37+
110, //resize height
38+
84, //crop width
39+
84, //crop height
40+
0, //cropping x offset
41+
0, //cropping y offset
42+
4 //skip mod (one frame is picked every x
43+
);
4444

4545
QLearning.QLConfiguration ALE_QL =
4646
new QLearning.QLConfiguration(
@@ -69,7 +69,7 @@ public static void main(String[] args) throws IOException {
6969
//setup the emulation environment through ALE, you will need a ROM file
7070
// set render to true to see the agent play (poorly). You can also see how slowly the data is generated and
7171
// understand why training would take a long time.
72-
ALEMDP mdp = new ALEMDP("E:\\projects\\ArcadeLearningEnvironment\\pong.bin", false);
72+
ALEMDP mdp = new ALEMDP("pong.bin", true);
7373

7474
//setup the training
7575
QLearningDiscreteConv<ALEMDP.GameScreen> dql = new QLearningDiscreteConv<ALEMDP.GameScreen>(mdp, ALE_NET_QL, ALE_HP, ALE_QL);
Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,50 @@
1+
/* *****************************************************************************
2+
* Copyright (c) 2020 Konduit, Inc.
3+
*
4+
* This program and the accompanying materials are made available under the
5+
* terms of the Apache License, Version 2.0 which is available at
6+
* https://www.apache.org/licenses/LICENSE-2.0.
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
10+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
11+
* License for the specific language governing permissions and limitations
12+
* under the License.
13+
*
14+
* SPDX-License-Identifier: Apache-2.0
15+
******************************************************************************/
16+
17+
import org.deeplearning4j.rl4j.learning.HistoryProcessor;
118
import org.deeplearning4j.rl4j.mdp.ale.ALEMDP;
219
import org.deeplearning4j.rl4j.policy.ACPolicy;
320

421
import java.io.IOException;
5-
import java.util.logging.Logger;
622

23+
/**
24+
* @author robaltena
25+
*
26+
* This sample shows how to plat an ALE game with a trained model.
27+
*/
728
public class PlayALE {
829
public static void main(String[] args) throws IOException {
9-
ALEMDP mdp = new ALEMDP("E:\\projects\\ArcadeLearningEnvironment\\pong.bin");
30+
ALEMDP mdp = new ALEMDP("pong.bin", true);
1031

11-
//load the previous agent
32+
//load the trained agent
1233
ACPolicy<ALEMDP.GameScreen> pol2 = ACPolicy.load("ale-a3c.model");
1334

14-
//evaluate the agent
15-
double rewards = 0;
16-
for (int i = 0; i < 10; i++) {
17-
mdp.reset();
18-
double reward = pol2.play(mdp);
19-
rewards += reward;
20-
Logger.getAnonymousLogger().info("Reward: " + reward);
21-
}
22-
23-
Logger.getAnonymousLogger().info("average: " + rewards/1000);
35+
//The training history processor used for data pre processing steps.
36+
HistoryProcessor.Configuration ALE_HP = new HistoryProcessor.Configuration(
37+
4, //History length
38+
84, //resize width
39+
110, //resize height
40+
84, //crop width
41+
84, //crop height
42+
0, //cropping x offset
43+
0, //cropping y offset
44+
4 //skip mod (one frame is picked every x
45+
);
2446

47+
pol2.play(mdp, ALE_HP);
48+
mdp.close();
2549
}
2650
}

rl4j-ale-examples/src/test/java/AleTest.java

Lines changed: 0 additions & 73 deletions
This file was deleted.

0 commit comments

Comments
 (0)