File tree Expand file tree Collapse file tree 2 files changed +52
-0
lines changed
Expand file tree Collapse file tree 2 files changed +52
-0
lines changed Original file line number Diff line number Diff line change 11import numpy as np
22from rl .util import log_self
33from rl .policy .base_policy import Policy
4+ from rl .policy .epsilon_greedy import EpsilonGreedyPolicy
45
56
67class NoNoisePolicy (Policy ):
@@ -63,6 +64,26 @@ def update(self, sys_vars):
6364 self .n_step = sys_vars ['epi' ]
6465
6566
67+ class EpsilonGreedyNoisePolicy (EpsilonGreedyPolicy , NoNoisePolicy ):
68+
69+ '''
70+ akin to epsilon greedy decay,
71+ but return random sample instead
72+ '''
73+
74+ def sample (self ):
75+ if self .e > np .random .rand ():
76+ noise = np .random .uniform (
77+ 0.5 * self .env_spec ['action_bound_low' ],
78+ 0.5 * self .env_spec ['action_bound_high' ])
79+ else :
80+ noise = 0
81+ return noise
82+
83+ def select_action (self , state ):
84+ return NoNoisePolicy .select_action (self , state )
85+
86+
6687class AnnealedGaussianPolicy (LinearNoisePolicy ):
6788
6889 '''
Original file line number Diff line number Diff line change 736736 ]
737737 }
738738 },
739+ "pendulum_ddpg_epsilonnoise" : {
740+ "problem" : " Pendulum-v0" ,
741+ "Agent" : " DDPG" ,
742+ "HyperOptimizer" : " GridSearch" ,
743+ "Memory" : " LinearMemoryWithForgetting" ,
744+ "Optimizer" : " AdamOptimizer" ,
745+ "Policy" : " EpsilonGreedyNoisePolicy" ,
746+ "PreProcessor" : " NoPreProcessor" ,
747+ "param" : {
748+ "batch_size" : 64 ,
749+ "n_epoch" : 1 ,
750+ "tau" : 0.005 ,
751+ "lr" : 0.001 ,
752+ "critic_lr" : 0.001 ,
753+ "exploration_anneal_episodes" : 50 ,
754+ "gamma" : 0.97 ,
755+ "hidden_layers" : [400 , 300 ],
756+ "hidden_layers_activation" : " relu" ,
757+ "output_layer_activation" : " tanh"
758+ },
759+ "param_range" : {
760+ "lr" : [0.0001 , 0.0005 , 0.001 ],
761+ "critic_lr" : [0.001 , 0.005 ],
762+ "gamma" : [0.95 , 0.97 , 0.99 ],
763+ "hidden_layers" : [
764+ [400 , 300 ],
765+ [800 , 400 , 200 ],
766+ [800 , 600 , 400 , 200 ]
767+ ]
768+ }
769+ },
739770 "pendulum_ddpg_linearnoise" : {
740771 "problem" : " Pendulum-v0" ,
741772 "Agent" : " DDPG" ,
You can’t perform that action at this time.
0 commit comments