@@ -39,7 +39,7 @@ def reset(self) -> ResetReturn:
39
39
observations = self ._environment .get_obs ()
40
40
41
41
observations = {
42
- agent : observations [i ].astype ("float32" ) for i , agent in enumerate (self .possible_agents )
42
+ agent : observations [i ].astype ("float32" ) for i , agent in enumerate (self .agents )
43
43
}
44
44
45
45
info = {"state" : self ._environment .get_state ()}
@@ -48,20 +48,20 @@ def reset(self) -> ResetReturn:
48
48
49
49
def step (self , actions : Dict [str , np .ndarray ]) -> StepReturn :
50
50
mujoco_actions = []
51
- for agent in self .possible_agents :
51
+ for agent in self .agents :
52
52
mujoco_actions .append (actions [agent ])
53
53
54
54
reward , done , info = self ._environment .step (mujoco_actions )
55
55
56
- terminals = {agent : done for agent in self .possible_agents }
57
- trunctations = {agent : False for agent in self .possible_agents }
56
+ terminals = {agent : done for agent in self .agents }
57
+ trunctations = {agent : False for agent in self .agents }
58
58
59
- rewards = {agent : reward for agent in self .possible_agents }
59
+ rewards = {agent : reward for agent in self .agents }
60
60
61
61
observations = self ._environment .get_obs ()
62
62
63
63
observations = {
64
- agent : observations [i ].astype ("float32" ) for i , agent in enumerate (self .possible_agents )
64
+ agent : observations [i ].astype ("float32" ) for i , agent in enumerate (self .agents )
65
65
}
66
66
67
67
info = {}
0 commit comments