|
| 1 | +/* ***************************************************************************** |
| 2 | + * Copyright (c) 2015-2019 Skymind, Inc. |
| 3 | + * |
| 4 | + * This program and the accompanying materials are made available under the |
| 5 | + * terms of the Apache License, Version 2.0 which is available at |
| 6 | + * https://www.apache.org/licenses/LICENSE-2.0. |
| 7 | + * |
| 8 | + * Unless required by applicable law or agreed to in writing, software |
| 9 | + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 10 | + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 11 | + * License for the specific language governing permissions and limitations |
| 12 | + * under the License. |
| 13 | + * |
| 14 | + * SPDX-License-Identifier: Apache-2.0 |
| 15 | + ******************************************************************************/ |
| 16 | + |
| 17 | +import org.deeplearning4j.rl4j.learning.async.a3c.discrete.A3CDiscrete; |
| 18 | +import org.deeplearning4j.rl4j.learning.async.a3c.discrete.A3CDiscreteDense; |
| 19 | +import org.deeplearning4j.rl4j.mdp.gym.GymEnv; |
| 20 | +import org.deeplearning4j.rl4j.network.ac.ActorCriticFactorySeparateStdDense; |
| 21 | +import org.deeplearning4j.rl4j.policy.ACPolicy; |
| 22 | +import org.deeplearning4j.rl4j.space.Box; |
| 23 | +import org.deeplearning4j.rl4j.space.DiscreteSpace; |
| 24 | +import org.deeplearning4j.rl4j.space.Encodable; |
| 25 | +import org.nd4j.linalg.learning.config.Adam; |
| 26 | + |
| 27 | +import java.io.IOException; |
| 28 | + |
| 29 | +/** |
| 30 | + * @author rubenfiszel ([email protected]) on 8/18/16. |
| 31 | + * |
| 32 | + * A3C on cartpole |
| 33 | + * This example shows the classes in rl4j that implement the article here: https://arxiv.org/abs/1602.01783 |
| 34 | + * Asynchronous Methods for Deep Reinforcement Learning. Mnih et al. |
| 35 | + * |
| 36 | + */ |
| 37 | +public class A3CCartpole { |
| 38 | + |
| 39 | + public static void main(String[] args) throws IOException { |
| 40 | + A3CcartPole(); |
| 41 | + } |
| 42 | + |
| 43 | + private static void A3CcartPole() throws IOException { |
| 44 | + |
| 45 | + //define the mdp from gym (name, render) |
| 46 | + String envUD = "CartPole-v1"; |
| 47 | + GymEnv<Encodable, Integer, DiscreteSpace> mdp = new GymEnv<Encodable, Integer, DiscreteSpace>(envUD, false, false); |
| 48 | + |
| 49 | + A3CDiscrete.A3CConfiguration CARTPOLE_A3C = |
| 50 | + new A3CDiscrete.A3CConfiguration( |
| 51 | + 123, //Random seed |
| 52 | + 200, //Max step By epoch |
| 53 | + 500000, //Max step |
| 54 | + 8, //Number of threads |
| 55 | + 20, //t_max |
| 56 | + 10, //num step noop warmup |
| 57 | + 0.01, //reward scaling |
| 58 | + 0.99, //gamma |
| 59 | + 1.0 //td-error clipping |
| 60 | + ); |
| 61 | + |
| 62 | + ActorCriticFactorySeparateStdDense.Configuration CARTPOLE_NET_A3C = ActorCriticFactorySeparateStdDense.Configuration |
| 63 | + .builder().updater(new Adam(1e-2)).l2(0).numHiddenNodes(16).numLayer(3).build(); |
| 64 | + |
| 65 | + //define the training |
| 66 | + A3CDiscreteDense<Encodable> a3c = new A3CDiscreteDense<Encodable>(mdp, CARTPOLE_NET_A3C, CARTPOLE_A3C); |
| 67 | + |
| 68 | + a3c.train(); //start the training |
| 69 | + mdp.close(); |
| 70 | + |
| 71 | + ACPolicy<org.deeplearning4j.rl4j.space.Encodable> pol = a3c.getPolicy(); |
| 72 | + |
| 73 | + pol.save("/tmp/val1/", "/tmp/pol1"); |
| 74 | + |
| 75 | + //reload the policy, will be equal to "pol", but without the randomness |
| 76 | + ACPolicy<Box> pol2 = ACPolicy.load("/tmp/val1/", "/tmp/pol1"); |
| 77 | + Cartpole.loadCartpole(pol2, envUD); |
| 78 | + System.out.println("sample finished."); |
| 79 | + } |
| 80 | + |
| 81 | +} |
0 commit comments