diff --git a/AIDojoCoordinator/coordinator.py b/AIDojoCoordinator/coordinator.py index 9f0a2a8e..a2986c2a 100644 --- a/AIDojoCoordinator/coordinator.py +++ b/AIDojoCoordinator/coordinator.py @@ -637,12 +637,13 @@ async def _process_game_action(self, agent_addr: tuple, action:Action)->None: await self._episode_rewards_condition.wait() # append step to the trajectory if needed - async with self._agents_lock: - self._add_step_to_trajectory(agent_addr, action, self._agent_rewards[agent_addr], new_state,end_reason=None) - # add information to 'info' field if needed + info = {} if self._agent_status[agent_addr] not in [AgentStatus.Playing, AgentStatus.PlayingWithTimeout]: info["end_reason"] = str(self._agent_status[agent_addr]) + async with self._agents_lock: + self._add_step_to_trajectory(agent_addr, action, self._agent_rewards[agent_addr], new_state,end_reason=info.get("end_reason", "")) + # add information to 'info' field if needed new_observation = Observation(self._agent_states[agent_addr], self._agent_rewards[agent_addr], self._episode_ends[agent_addr], info=info) self._agent_observations[agent_addr] = new_observation output_message_dict = {