Skip to content

Commit 4de09a9

Browse files
authored
Merge pull request #392 from stratosphereips/ondra-fix-trajectories-endings
Fix trajectory ending handling by adding end reason to agent observations
2 parents 43b85eb + c006e06 commit 4de09a9

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

AIDojoCoordinator/coordinator.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -637,12 +637,13 @@ async def _process_game_action(self, agent_addr: tuple, action:Action)->None:
637637
await self._episode_rewards_condition.wait()
638638
# append step to the trajectory if needed
639639

640-
async with self._agents_lock:
641-
self._add_step_to_trajectory(agent_addr, action, self._agent_rewards[agent_addr], new_state,end_reason=None)
642-
# add information to 'info' field if needed
640+
643641
info = {}
644642
if self._agent_status[agent_addr] not in [AgentStatus.Playing, AgentStatus.PlayingWithTimeout]:
645643
info["end_reason"] = str(self._agent_status[agent_addr])
644+
async with self._agents_lock:
645+
self._add_step_to_trajectory(agent_addr, action, self._agent_rewards[agent_addr], new_state,end_reason=info.get("end_reason", ""))
646+
# add information to 'info' field if needed
646647
new_observation = Observation(self._agent_states[agent_addr], self._agent_rewards[agent_addr], self._episode_ends[agent_addr], info=info)
647648
self._agent_observations[agent_addr] = new_observation
648649
output_message_dict = {

0 commit comments

Comments
 (0)