Skip to content

Commit ee51548

Browse files
committed
feat(rl): add crash_penalty for simulation instability
Add configurable crash_penalty (default -100.0) to AndesEnv that penalizes the agent when TDS terminates due to stability violations. This gives the RL agent a strong signal to avoid destabilizing actions.
1 parent 769b59e commit ee51548

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

andes/rl/env.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ class AndesEnv(gymnasium.Env):
4545
disturbance_fn : callable or None
4646
``disturbance_fn(env) -> None``, called after each ``reinit``
4747
in ``reset()`` to inject episode-specific perturbations.
48+
crash_penalty : float
49+
Reward penalty applied when the simulation terminates due to
50+
instability (default ``-100.0``). Set to ``0.0`` to disable.
4851
action_low : float or array-like
4952
Lower bound for the action space (default unbounded).
5053
action_high : float or array-like
@@ -84,6 +87,7 @@ def __init__(
8487
dt=0.1,
8588
tf=20.0,
8689
disturbance_fn=None,
90+
crash_penalty=-100.0,
8791
action_low=-np.inf,
8892
action_high=np.inf,
8993
obs_low=-np.inf,
@@ -127,6 +131,7 @@ def __init__(
127131
self._tf = float(tf)
128132
self._reward_fn = reward_fn
129133
self._disturbance_fn = disturbance_fn
134+
self._crash_penalty = float(crash_penalty)
130135
self._step_count = 0
131136

132137
# --- resolve observations ---
@@ -199,11 +204,13 @@ def step(self, action):
199204

200205
obs = self._get_obs()
201206

202-
reward = float(self._reward_fn(obs, action, self))
203-
204207
truncated = (self._tf - float(self._ss.dae.t)) < 0.5 * self._dt
205208
terminated = (not success) and (not truncated)
206209

210+
reward = float(self._reward_fn(obs, action, self))
211+
if terminated:
212+
reward += self._crash_penalty
213+
207214
self._step_count += 1
208215
info = {'t': float(self._ss.dae.t), 'success': success,
209216
'step': self._step_count}

0 commit comments

Comments
 (0)