2828
2929
3030class Scenario (BaseScenario ):
31- def make_world (self , batch_dim : int , device : torch .device , ** kwargs ):
32- self .init_params (** kwargs )
33- world = self .init_world (batch_dim , device )
34- self .init_agents (world )
35- self .init_ball (world )
36- self .init_background (world )
37- self .init_walls (world )
38- self .init_goals (world )
39- self .init_traj_pts (world )
40- self .left_goal_pos = torch .tensor (
41- [- self .pitch_length / 2 - self .ball_size / 2 , 0 ],
42- device = device ,
43- dtype = torch .float ,
44- )
45- self .right_goal_pos = - self .left_goal_pos
46- self ._done = torch .zeros (batch_dim , device = device , dtype = torch .bool )
47- self ._sparse_reward_blue = torch .zeros (
48- batch_dim , device = device , dtype = torch .float32
49- )
50- self ._sparse_reward_red = self ._sparse_reward_blue .clone ()
51- self ._render_field = True
52- self .min_agent_dist_to_ball_blue = None
53- self .min_agent_dist_to_ball_red = None
54-
55- self ._reset_agent_range = torch .tensor (
56- [self .pitch_length / 2 , self .pitch_width ],
57- device = device ,
58- )
59- self ._reset_agent_offset_blue = torch .tensor (
60- [- self .pitch_length / 2 + self .agent_size , - self .pitch_width / 2 ],
61- device = device ,
62- )
63- self ._reset_agent_offset_red = torch .tensor (
64- [- self .agent_size , - self .pitch_width / 2 ], device = device
65- )
66- self ._agents_rel_pos_to_ball = None
67- return world
68-
69- def reset_world_at (self , env_index : int = None ):
70- self .reset_agents (env_index )
71- self .reset_ball (env_index )
72- self .reset_walls (env_index )
73- self .reset_goals (env_index )
74- self .reset_controllers (env_index )
75- if env_index is None :
76- self ._done [:] = False
77- else :
78- self ._done [env_index ] = False
79-
8031 def init_params (self , ** kwargs ):
8132 # Scenario config
8233 self .viewer_size = kwargs .pop ("viewer_size" , (1200 , 800 ))
8334
8435 # Agents config
8536 self .n_blue_agents = kwargs .pop ("n_blue_agents" , 3 )
8637 self .n_red_agents = kwargs .pop ("n_red_agents" , 3 )
38+ # What agents should be learning and what controlled by the heuristic (ai)
8739 self .ai_red_agents = kwargs .pop ("ai_red_agents" , True )
8840 self .ai_blue_agents = kwargs .pop ("ai_blue_agents" , False )
41+
42+ # When you have 5 blue agents there is the options of introducing physical differences with the following roles:
43+ # 1 goalkeeper -> slow and big
44+ # 2 defenders -> normal size and speed (agent_size, u_multiplier, max_speed)
45+ # 2 attackers -> small and fast
8946 self .physically_different = kwargs .pop ("physically_different" , False )
9047
9148 # Agent spawning
9249 self .spawn_in_formation = kwargs .pop ("spawn_in_formation" , False )
93- self .only_blue_formation = kwargs .pop ("only_blue_formation" , True )
50+ self .only_blue_formation = kwargs .pop (
51+ "only_blue_formation" , True
52+ ) # Only spawn blue agents in formation
9453 self .formation_agents_per_column = kwargs .pop ("formation_agents_per_column" , 2 )
9554 self .randomise_formation_indices = kwargs .pop (
9655 "randomise_formation_indices" , False
97- )
98- self .formation_noise = kwargs .pop ("formation_noise" , 0.2 )
56+ ) # If False, each agent will always be in the same formation spot
57+ self .formation_noise = kwargs .pop (
58+ "formation_noise" , 0.2
59+ ) # Noise on formation positions
9960
10061 # Ai config
101- self .n_traj_points = kwargs .pop ("n_traj_points" , 0 )
102- self .ai_speed_strength = kwargs .pop ("ai_strength" , 1.0 )
103- self .ai_decision_strength = kwargs .pop ("ai_decision_strength" , 1.0 )
104- self .ai_precision_strength = kwargs .pop ("ai_precision_strength" , 1.0 )
62+ self .n_traj_points = kwargs .pop (
63+ "n_traj_points" , 0
64+ ) # Number of spline trajectory points to plot for heuristic (ai) agents
65+ self .ai_speed_strength = kwargs .pop (
66+ "ai_strength" , 1.0
67+ ) # The speed of the ai 0<=x<=1
68+ self .ai_decision_strength = kwargs .pop (
69+ "ai_decision_strength" , 1.0
70+ ) # The decision strength of the ai 0<=x<=1
71+ self .ai_precision_strength = kwargs .pop (
72+ "ai_precision_strength" , 1.0
73+ ) # The precision strength of the ai 0<=x<=1
10574 self .disable_ai_red = kwargs .pop ("disable_ai_red" , False )
10675
10776 # Task sizes
@@ -117,7 +86,9 @@ def init_params(self, **kwargs):
11786 self .u_multiplier = kwargs .pop ("u_multiplier" , 0.1 )
11887
11988 # Actions shooting
120- self .enable_shooting = kwargs .pop ("enable_shooting" , False )
89+ self .enable_shooting = kwargs .pop (
90+ "enable_shooting" , False
91+ ) # Whether to enable an extra 2 actions (for rotation and shooting). Only avaioable for non-ai agents
12192 self .u_rot_multiplier = kwargs .pop ("u_rot_multiplier" , 0.0003 )
12293 self .u_shoot_multiplier = kwargs .pop ("u_shoot_multiplier" , 0.6 )
12394 self .shooting_radius = kwargs .pop ("shooting_radius" , 0.08 )
@@ -131,12 +102,16 @@ def init_params(self, **kwargs):
131102 self .dense_reward = kwargs .pop ("dense_reward" , True )
132103 self .pos_shaping_factor_ball_goal = kwargs .pop (
133104 "pos_shaping_factor_ball_goal" , 10.0
134- )
105+ ) # Reward for moving the ball towards the opponents' goal. This can be annealed in a curriculum.
135106 self .pos_shaping_factor_agent_ball = kwargs .pop (
136107 "pos_shaping_factor_agent_ball" , 0.1
137- )
108+ ) # Reward for moving the closest agent to the ball in a team closer to it.
109+ # This is useful for exploration and can be annealed in a curriculum.
110+ # This reward does not trigger if the agent is less than distance_to_ball_trigger from the ball or the ball is moving
138111 self .distance_to_ball_trigger = kwargs .pop ("distance_to_ball_trigger" , 0.4 )
139- self .scoring_reward = kwargs .pop ("scoring_reward" , 100.0 )
112+ self .scoring_reward = kwargs .pop (
113+ "scoring_reward" , 100.0
114+ ) # Discrete reward for scoring
140115
141116 # Observations
142117 self .observe_teammates = kwargs .pop ("observe_teammates" , True )
@@ -150,6 +125,57 @@ def init_params(self, **kwargs):
150125 )
151126 ScenarioUtils .check_kwargs_consumed (kwargs )
152127
128+ def make_world (self , batch_dim : int , device : torch .device , ** kwargs ):
129+ self .init_params (** kwargs )
130+ world = self .init_world (batch_dim , device )
131+ self .init_agents (world )
132+ self .init_ball (world )
133+ self .init_background (world )
134+ self .init_walls (world )
135+ self .init_goals (world )
136+ self .init_traj_pts (world )
137+
138+ # Cached values
139+ self .left_goal_pos = torch .tensor (
140+ [- self .pitch_length / 2 - self .ball_size / 2 , 0 ],
141+ device = device ,
142+ dtype = torch .float ,
143+ )
144+ self .right_goal_pos = - self .left_goal_pos
145+ self ._done = torch .zeros (batch_dim , device = device , dtype = torch .bool )
146+ self ._sparse_reward_blue = torch .zeros (
147+ batch_dim , device = device , dtype = torch .float32
148+ )
149+ self ._sparse_reward_red = self ._sparse_reward_blue .clone ()
150+ self ._render_field = True
151+ self .min_agent_dist_to_ball_blue = None
152+ self .min_agent_dist_to_ball_red = None
153+
154+ self ._reset_agent_range = torch .tensor (
155+ [self .pitch_length / 2 , self .pitch_width ],
156+ device = device ,
157+ )
158+ self ._reset_agent_offset_blue = torch .tensor (
159+ [- self .pitch_length / 2 + self .agent_size , - self .pitch_width / 2 ],
160+ device = device ,
161+ )
162+ self ._reset_agent_offset_red = torch .tensor (
163+ [- self .agent_size , - self .pitch_width / 2 ], device = device
164+ )
165+ self ._agents_rel_pos_to_ball = None
166+ return world
167+
168+ def reset_world_at (self , env_index : int = None ):
169+ self .reset_agents (env_index )
170+ self .reset_ball (env_index )
171+ self .reset_walls (env_index )
172+ self .reset_goals (env_index )
173+ self .reset_controllers (env_index )
174+ if env_index is None :
175+ self ._done [:] = False
176+ else :
177+ self ._done [env_index ] = False
178+
153179 def init_world (self , batch_dim : int , device : torch .device ):
154180 # Make world
155181 world = World (
0 commit comments