@@ -11,26 +11,28 @@ RainbowAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
1111RainbowAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE
1212RainbowAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
1313RainbowAgent.network = @gym_lib.cartpole_rainbow_network
14- RainbowAgent.num_atoms = 51
15- RainbowAgent.vmax = 10 .
14+ RainbowAgent.num_atoms = 201
15+ RainbowAgent.vmax = 100 .
1616RainbowAgent.gamma = 0.99
17+ RainbowAgent.epsilon_eval = 0.
18+ RainbowAgent.epsilon_train = 0.01
1719RainbowAgent.update_horizon = 1
1820RainbowAgent.min_replay_history = 500
19- RainbowAgent.update_period = 4
20- RainbowAgent.target_update_period = 100
21+ RainbowAgent.update_period = 1
22+ RainbowAgent.target_update_period = 1
2123RainbowAgent.epsilon_fn = @dqn_agent.identity_epsilon
2224RainbowAgent.replay_scheme = 'uniform'
2325RainbowAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
2426RainbowAgent.optimizer = @tf.train.AdamOptimizer()
2527
26- tf.train.AdamOptimizer.learning_rate = 0.001
27- tf.train.AdamOptimizer.epsilon = 0.0003125
28+ tf.train.AdamOptimizer.learning_rate = 0.00001
29+ tf.train.AdamOptimizer.epsilon = 0.00000390625
2830
2931create_gym_environment.environment_name = 'CartPole'
3032create_gym_environment.version = 'v0'
3133create_agent.agent_name = 'rainbow'
3234Runner.create_environment_fn = @gym_lib.create_gym_environment
33- Runner.num_iterations = 500
35+ Runner.num_iterations = 400
3436Runner.training_steps = 1000
3537Runner.evaluation_steps = 1000
3638Runner.max_steps_per_episode = 200 # Default max episode length.
0 commit comments