guard continuous action range in policy

kengz · kengz · commit 193ab599c252 · 2017-04-26T21:51:16.000-04:00
diff --git a/rl/policy/actor_critic.py b/rl/policy/actor_critic.py
@@ -82,6 +82,9 @@ def select_action(self, state):
         a_mean = agent.actor.predict(state)[0]  # extract from batch predict
         action = a_mean + np.random.normal(
             loc=0.0, scale=self.variance, size=a_mean.shape)
+        action = np.clip(action,
+                         self.env_spec['action_bound_low'],
+                         self.env_spec['action_bound_high'])
         return action
 
     def update(self, sys_vars):
diff --git a/rl/policy/noise.py b/rl/policy/noise.py
@@ -25,6 +25,9 @@ def select_action(self, state):
         state = np.expand_dims(state, axis=0)
         if self.env_spec['actions'] == 'continuous':
             action = agent.actor.predict(state)[0] + self.sample()
+            action = np.clip(action,
+                             self.env_spec['action_bound_low'],
+                             self.env_spec['action_bound_high'])
         else:
             Q_state = agent.actor.predict(state)[0]
             assert Q_state.ndim == 1