Skip to content

Commit 0dbb9dd

Browse files
Add env.evaluate for episode evaluation
1 parent 2d7d5a2 commit 0dbb9dd

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

src/agentlab/benchmarks/osworld.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ def step(self, action: str):
8585
raw_obs, reward, done, info = self.env.step(env_action)
8686
self._step_count += 1
8787
truncated = info.get('fail', False) or self._step_count >= self.max_steps
88+
if done or truncated:
89+
try:
90+
reward = self.env.evaluate()
91+
except Exception as e:
92+
logger.warning(f"Failed to evaluate {self.task} task: {e}")
8893
obs = self.env_to_agentlab_observation(raw_obs)
8994
return obs, reward, done, truncated, info
9095

0 commit comments

Comments
 (0)