Skip to content

Commit 9d4ccd3

Browse files
committed
add epsilonnoise policy
1 parent 93dcb2d commit 9d4ccd3

File tree

2 files changed

+52
-0
lines changed

2 files changed

+52
-0
lines changed

rl/policy/noise.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
from rl.util import log_self
33
from rl.policy.base_policy import Policy
4+
from rl.policy.epsilon_greedy import EpsilonGreedyPolicy
45

56

67
class NoNoisePolicy(Policy):
@@ -63,6 +64,26 @@ def update(self, sys_vars):
6364
self.n_step = sys_vars['epi']
6465

6566

67+
class EpsilonGreedyNoisePolicy(EpsilonGreedyPolicy, NoNoisePolicy):
68+
69+
'''
70+
akin to epsilon greedy decay,
71+
but return random sample instead
72+
'''
73+
74+
def sample(self):
75+
if self.e > np.random.rand():
76+
noise = np.random.uniform(
77+
0.5 * self.env_spec['action_bound_low'],
78+
0.5 * self.env_spec['action_bound_high'])
79+
else:
80+
noise = 0
81+
return noise
82+
83+
def select_action(self, state):
84+
return NoNoisePolicy.select_action(self, state)
85+
86+
6687
class AnnealedGaussianPolicy(LinearNoisePolicy):
6788

6889
'''

rl/spec/classic_experiment_specs.json

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,37 @@
736736
]
737737
}
738738
},
739+
"pendulum_ddpg_epsilonnoise": {
740+
"problem": "Pendulum-v0",
741+
"Agent": "DDPG",
742+
"HyperOptimizer": "GridSearch",
743+
"Memory": "LinearMemoryWithForgetting",
744+
"Optimizer": "AdamOptimizer",
745+
"Policy": "EpsilonGreedyNoisePolicy",
746+
"PreProcessor": "NoPreProcessor",
747+
"param": {
748+
"batch_size": 64,
749+
"n_epoch": 1,
750+
"tau": 0.005,
751+
"lr": 0.001,
752+
"critic_lr": 0.001,
753+
"exploration_anneal_episodes": 50,
754+
"gamma": 0.97,
755+
"hidden_layers": [400, 300],
756+
"hidden_layers_activation": "relu",
757+
"output_layer_activation": "tanh"
758+
},
759+
"param_range": {
760+
"lr": [0.0001, 0.0005, 0.001],
761+
"critic_lr": [0.001, 0.005],
762+
"gamma": [0.95, 0.97, 0.99],
763+
"hidden_layers": [
764+
[400, 300],
765+
[800, 400, 200],
766+
[800, 600, 400, 200]
767+
]
768+
}
769+
},
739770
"pendulum_ddpg_linearnoise": {
740771
"problem": "Pendulum-v0",
741772
"Agent": "DDPG",

0 commit comments

Comments
 (0)