@@ -215,25 +215,23 @@ def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo:
215215 prev_text_actions = []
216216 for agent_id in next_info .agents :
217217 agent_brain_info = self .training_buffer [agent_id ].last_brain_info
218+ agent_index = agent_brain_info .agents .index (agent_id )
218219 if agent_brain_info is None :
219220 agent_brain_info = next_info
220221 for i in range (len (next_info .visual_observations )):
221- visual_observations [i ].append (
222- agent_brain_info .visual_observations [i ][agent_brain_info .agents .index (agent_id )])
223- vector_observations .append (agent_brain_info .vector_observations [agent_brain_info .agents .index (agent_id )])
224- text_observations .append (agent_brain_info .text_observations [agent_brain_info .agents .index (agent_id )])
222+ visual_observations [i ].append (agent_brain_info .visual_observations [i ][agent_index ])
223+ vector_observations .append (agent_brain_info .vector_observations [agent_index ])
224+ text_observations .append (agent_brain_info .text_observations [agent_index ])
225225 if self .use_recurrent :
226- memories .append (agent_brain_info .memories [agent_brain_info .agents .index (agent_id )])
227- rewards .append (agent_brain_info .rewards [agent_brain_info .agents .index (agent_id )])
228- local_dones .append (agent_brain_info .local_done [agent_brain_info .agents .index (agent_id )])
229- max_reacheds .append (agent_brain_info .max_reached [agent_brain_info .agents .index (agent_id )])
230- agents .append (agent_brain_info .agents [agent_brain_info .agents .index (agent_id )])
231- prev_vector_actions .append (
232- agent_brain_info .previous_vector_actions [agent_brain_info .agents .index (agent_id )])
233- prev_text_actions .append (agent_brain_info .previous_text_actions [agent_brain_info .agents .index (agent_id )])
226+ memories .append (agent_brain_info .memories [agent_index ])
227+ rewards .append (agent_brain_info .rewards [agent_index ])
228+ local_dones .append (agent_brain_info .local_done [agent_index ])
229+ max_reacheds .append (agent_brain_info .max_reached [agent_index ])
230+ agents .append (agent_brain_info .agents [agent_index ])
231+ prev_vector_actions .append (agent_brain_info .previous_vector_actions [agent_index ])
232+ prev_text_actions .append (agent_brain_info .previous_text_actions [agent_index ])
234233 curr_info = BrainInfo (visual_observations , vector_observations , text_observations , memories , rewards ,
235- agents ,
236- local_dones , prev_vector_actions , prev_text_actions , max_reacheds )
234+ agents , local_dones , prev_vector_actions , prev_text_actions , max_reacheds )
237235 return curr_info
238236
239237 def generate_intrinsic_rewards (self , curr_info , next_info ):
0 commit comments