|
17 | 17 | package org.deeplearning4j.examples.rl4j;
|
18 | 18 |
|
19 | 19 | import java.io.IOException;
|
| 20 | + |
20 | 21 | import org.deeplearning4j.rl4j.learning.HistoryProcessor;
|
21 | 22 | import org.deeplearning4j.rl4j.learning.sync.qlearning.QLearning;
|
22 | 23 | import org.deeplearning4j.rl4j.learning.sync.qlearning.discrete.QLearningDiscreteConv;
|
|
26 | 27 |
|
27 | 28 | /**
|
28 | 29 | * @author saudet
|
29 |
| - * |
| 30 | + * <p> |
30 | 31 | * Main example for DQN with The Arcade Learning Environment (ALE)
|
31 |
| - * |
32 | 32 | */
|
33 | 33 | public class ALE {
|
34 | 34 |
|
35 | 35 | public static HistoryProcessor.Configuration ALE_HP =
|
36 |
| - new HistoryProcessor.Configuration( |
37 |
| - 4, //History length |
38 |
| - 84, //resize width |
39 |
| - 110, //resize height |
40 |
| - 84, //crop width |
41 |
| - 84, //crop height |
42 |
| - 0, //cropping x offset |
43 |
| - 0, //cropping y offset |
44 |
| - 4 //skip mod (one frame is picked every x |
45 |
| - ); |
| 36 | + new HistoryProcessor.Configuration( |
| 37 | + 4, //History length |
| 38 | + 84, //resize width |
| 39 | + 84, //resize height |
| 40 | + 160, //crop width |
| 41 | + 194, //crop height |
| 42 | + 0, //cropping x offset |
| 43 | + 32, //cropping y offset |
| 44 | + 4 //skip mod (one frame is picked every x |
| 45 | + ); |
46 | 46 |
|
47 | 47 | public static QLearning.QLConfiguration ALE_QL =
|
48 |
| - new QLearning.QLConfiguration( |
49 |
| - 123, //Random seed |
50 |
| - 10000, //Max step By epoch |
51 |
| - 8000000, //Max step |
52 |
| - 1000000, //Max size of experience replay |
53 |
| - 32, //size of batches |
54 |
| - 10000, //target update (hard) |
55 |
| - 500, //num step noop warmup |
56 |
| - 0.1, //reward scaling |
57 |
| - 0.99, //gamma |
58 |
| - 100.0, //td-error clipping |
59 |
| - 0.1f, //min epsilon |
60 |
| - 100000, //num step for eps greedy anneal |
61 |
| - true //double-dqn |
62 |
| - ); |
| 48 | + new QLearning.QLConfiguration( |
| 49 | + 123, //Random seed |
| 50 | + 10000, //Max step By epoch |
| 51 | + 8000000, //Max step |
| 52 | + 1000000, //Max size of experience replay |
| 53 | + 32, //size of batches |
| 54 | + 10000, //target update (hard) |
| 55 | + 500, //num step noop warmup |
| 56 | + 0.1, //reward scaling |
| 57 | + 0.99, //gamma |
| 58 | + 100.0, //td-error clipping |
| 59 | + 0.1f, //min epsilon |
| 60 | + 100000, //num step for eps greedy anneal |
| 61 | + true //double-dqn |
| 62 | + ); |
63 | 63 |
|
64 | 64 | public static DQNFactoryStdConv.Configuration ALE_NET_QL =
|
65 |
| - new DQNFactoryStdConv.Configuration( |
66 |
| - 0.00025, //learning rate |
67 |
| - 0.000, //l2 regularization |
68 |
| - null, null |
69 |
| - ); |
| 65 | + new DQNFactoryStdConv.Configuration( |
| 66 | + 0.00025, //learning rate |
| 67 | + 0.000, //l2 regularization |
| 68 | + null, null |
| 69 | + ); |
70 | 70 |
|
71 | 71 | public static void main(String[] args) throws IOException {
|
72 | 72 |
|
|
0 commit comments