instadeepai
diff --git a/‎jumanji/environments/commons/maze_utils/maze_generation.py‎
Lines changed: 8 additions & 11 deletions b/‎jumanji/environments/commons/maze_utils/maze_generation.py‎
Lines changed: 8 additions & 11 deletions
diff --git a/‎jumanji/environments/commons/maze_utils/maze_generation_test.py‎
Lines changed: 5 additions & 6 deletions b/‎jumanji/environments/commons/maze_utils/maze_generation_test.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎jumanji/environments/commons/maze_utils/maze_rendering.py‎
Lines changed: 6 additions & 5 deletions b/‎jumanji/environments/commons/maze_utils/maze_rendering.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎jumanji/environments/routing/cleaner/env.py‎
Lines changed: 8 additions & 16 deletions b/‎jumanji/environments/routing/cleaner/env.py‎
Lines changed: 8 additions & 16 deletions
diff --git a/‎jumanji/environments/routing/cleaner/env_test.py‎
Lines changed: 20 additions & 12 deletions b/‎jumanji/environments/routing/cleaner/env_test.py‎
Lines changed: 20 additions & 12 deletions
@@ -41,7 +41,6 @@
 import chex
 import jax
 import jax.numpy as jnp
-from typing_extensions import TypeAlias
 
 from jumanji.environments.commons.maze_utils.stack import (
     Stack,
@@ -51,8 +50,6 @@
     stack_push,
 )
 
-Maze: TypeAlias = chex.Array
-
 EMPTY = 0
 WALL = 1
 
@@ -65,7 +62,7 @@ class MazeGenerationState(NamedTuple):
     - key: the Jax random generation key.
     """
 
-    maze: Maze
+    maze: chex.Array
     chambers: Stack
     key: chex.PRNGKey
 
@@ -79,7 +76,7 @@ def create_chambers_stack(maze_width: int, maze_height: int) -> Stack:
     return stack_push(chambers, jnp.array([0, 0, maze_width, maze_height]))
 
 
-def create_empty_maze(width: int, height: int) -> Maze:
+def create_empty_maze(width: int, height: int) -> chex.Array:
     """Create an empty maze."""
     return jnp.full((height, width), EMPTY, dtype=jnp.int8)
 
@@ -94,19 +91,19 @@ def random_odd(key: chex.PRNGKey, max_val: int) -> chex.Array:
     return jax.random.randint(key, (), 0, max_val // 2) * 2 + 1
 
 
-def draw_horizontal_wall(maze: Maze, x: int, y: int, width: int) -> Maze:
+def draw_horizontal_wall(maze: chex.Array, x: int, y: int, width: int) -> chex.Array:
     """Draw a horizontal wall on the maze starting from (x,y) with the specified width."""
 
-    def body_fun(i: int, maze: Maze) -> Maze:
+    def body_fun(i: int, maze: chex.Array) -> chex.Array:
         return maze.at[y, i].set(WALL)
 
     return jax.lax.fori_loop(x, x + width, body_fun, maze)
 
 
-def draw_vertical_wall(maze: Maze, x: int, y: int, height: int) -> Maze:
+def draw_vertical_wall(maze: chex.Array, x: int, y: int, height: int) -> chex.Array:
     """Draw a vertical wall on the maze starting from (x,y) with the specified height."""
 
-    def body_fun(i: int, maze: Maze) -> Maze:
+    def body_fun(i: int, maze: chex.Array) -> chex.Array:
         return maze.at[i, x].set(WALL)
 
     return jax.lax.fori_loop(y, y + height, body_fun, maze)
@@ -156,7 +153,7 @@ def split_vertically(
 
 def split_horizontally(
     state: MazeGenerationState, chamber: chex.Array
-) -> Tuple[Maze, Stack, chex.PRNGKey]:
+) -> Tuple[chex.Array, Stack, chex.PRNGKey]:
     """Split the chamber horizontally.
 
     Randomly draw a vertical wall to split the chamber horizontally. Randomly open a passage
@@ -202,7 +199,7 @@ def chambers_remaining(state: MazeGenerationState) -> int:
     return ~empty_stack(state.chambers)
 
 
-def generate_maze(width: int, height: int, key: chex.PRNGKey) -> Maze:
+def generate_maze(width: int, height: int, key: chex.PRNGKey) -> chex.Array:
     """Randomly generate a maze.
 
     Args:
 
@@ -21,7 +21,6 @@
 from jumanji.environments.commons.maze_utils.maze_generation import (
     EMPTY,
     WALL,
-    Maze,
     MazeGenerationState,
     create_chambers_stack,
     create_empty_maze,
@@ -34,7 +33,7 @@
 from jumanji.environments.commons.maze_utils.stack import Stack, stack_pop
 
 
-def no_more_chamber(maze: Maze) -> chex.Array:
+def no_more_chamber(maze: chex.Array) -> chex.Array:
     """Test if there is no chamber in the maze that can be divided anymore.
 
     A chamber can be divided if its width and height are greater or equal to two.
@@ -48,7 +47,7 @@ def no_more_chamber(maze: Maze) -> chex.Array:
     return jnp.all(convolved)
 
 
-def all_tiles_connected(maze: Maze) -> bool:
+def all_tiles_connected(maze: chex.Array) -> bool:
     """Test if all the tiles of the maze can be reached.
 
     The function scipy.ndimage.label can be used to count the number of connected components
@@ -68,7 +67,7 @@ class TestMazeGeneration:
     HEIGHT = 15
 
     @pytest.fixture
-    def maze(self) -> Maze:
+    def maze(self) -> chex.Array:
         return create_empty_maze(self.WIDTH, self.HEIGHT)
 
     @pytest.fixture
@@ -111,7 +110,7 @@ def test_random_odd(self, key: chex.PRNGKey) -> None:
             assert 0 <= i < max_val
 
     def test_split_vertically(
-        self, maze: Maze, chambers: Stack, key: chex.PRNGKey
+        self, maze: chex.Array, chambers: Stack, key: chex.PRNGKey
     ) -> None:
         """Test that a horizontal wall is drawn and that subchambers are added to stack."""
         chambers, chamber = stack_pop(chambers)
@@ -126,7 +125,7 @@ def test_split_vertically(
         assert chambers.insertion_index >= 1
 
     def test_split_horizontally(
-        self, maze: Maze, chambers: Stack, key: chex.PRNGKey
+        self, maze: chex.Array, chambers: Stack, key: chex.PRNGKey
     ) -> None:
         """Test that a vertical wall is drawn and that subchambers are added to stack."""
         chambers, chamber = stack_pop(chambers)
 
@@ -14,6 +14,7 @@
 
 from typing import Callable, Optional, Sequence, Tuple
 
+import chex
 import matplotlib.animation
 import matplotlib.cm
 import matplotlib.pyplot as plt
@@ -23,7 +24,7 @@
 from numpy.typing import NDArray
 
 import jumanji.environments
-from jumanji.environments.commons.maze_utils.maze_generation import EMPTY, WALL, Maze
+from jumanji.environments.commons.maze_utils.maze_generation import EMPTY, WALL
 from jumanji.viewer import Viewer
 
 
@@ -55,7 +56,7 @@ def __init__(self, name: str, render_mode: str = "human") -> None:
         else:
             raise ValueError(f"Invalid render mode: {render_mode}")
 
-    def render(self, maze: Maze) -> Optional[NDArray]:
+    def render(self, maze: chex.Array) -> Optional[NDArray]:
         """
         Render maze.
 
@@ -73,7 +74,7 @@ def render(self, maze: Maze) -> Optional[NDArray]:
 
     def animate(
         self,
-        mazes: Sequence[Maze],
+        mazes: Sequence[chex.Array],
         interval: int = 200,
         save_path: Optional[str] = None,
     ) -> matplotlib.animation.FuncAnimation:
@@ -124,12 +125,12 @@ def _get_fig_ax(self) -> Tuple[plt.Figure, plt.Axes]:
             ax = fig.get_axes()[0]
         return fig, ax
 
-    def _add_grid_image(self, maze: Maze, ax: Axes) -> image.AxesImage:
+    def _add_grid_image(self, maze: chex.Array, ax: Axes) -> image.AxesImage:
         img = self._create_grid_image(maze)
         ax.set_axis_off()
         return ax.imshow(img)
 
-    def _create_grid_image(self, maze: Maze) -> NDArray:
+    def _create_grid_image(self, maze: chex.Array) -> NDArray:
         img = np.zeros((*maze.shape, 3))
         for tile_value, color in self.COLORS.items():
             img[np.where(maze == tile_value)] = color
 
@@ -82,7 +82,6 @@ class Cleaner(Environment[State]):
 
     def __init__(
         self,
-        num_agents: int = 3,
         generator: Optional[Generator] = None,
         time_limit: Optional[int] = None,
         penalty_per_timestep: float = 0.5,
@@ -95,13 +94,15 @@ def __init__(
             time_limit: max number of steps in an episode. Defaults to `num_rows * num_cols`.
             generator: `Generator` whose `__call__` instantiates an environment instance.
                 Implemented options are [`RandomGenerator`]. Defaults to `RandomGenerator` with
-                `num_rows=10` and `num_cols=10`.
+                `num_rows=10`, `num_cols=10` and `num_agents=3`.
             viewer: `Viewer` used for rendering. Defaults to `CleanerViewer` with "human" render
                 mode.
             penalty_per_timestep: the penalty returned at each timestep in the reward.
         """
-        self.num_agents = num_agents
-        self.generator = generator or RandomGenerator(num_rows=10, num_cols=10)
+        self.generator = generator or RandomGenerator(
+            num_rows=10, num_cols=10, num_agents=3
+        )
+        self.num_agents = self.generator.num_agents
         self.num_rows = self.generator.num_rows
         self.num_cols = self.generator.num_cols
         self.grid_shape = (self.num_rows, self.num_cols)
@@ -177,22 +178,13 @@ def reset(self, key: chex.PRNGKey) -> Tuple[State, TimeStep[Observation]]:
             timestep: `TimeStep` object corresponding to the first timestep returned by the
                 environment after a reset.
         """
-        key, subkey = jax.random.split(key)
-
         # Agents start in upper left corner
         agents_locations = jnp.zeros((self.num_agents, 2), int)
 
-        grid = self.generator(subkey)
-        # Clean the tile in upper left corner
-        grid = self._clean_tiles_containing_agents(grid, agents_locations)
+        state = self.generator(key)
 
-        state = State(
-            grid=grid,
-            agents_locations=agents_locations,
-            action_mask=self._compute_action_mask(grid, agents_locations),
-            step_count=jnp.array(0, jnp.int32),
-            key=key,
-        )
+        # Create the action mask and update the state
+        state.action_mask = self._compute_action_mask(state.grid, agents_locations)
 
         observation = self._observation_from_state(state)
 
 
@@ -19,37 +19,45 @@
 
 from jumanji.environments.routing.cleaner.constants import CLEAN, DIRTY, WALL
 from jumanji.environments.routing.cleaner.env import Cleaner
-from jumanji.environments.routing.cleaner.generator import Generator, Maze
+from jumanji.environments.routing.cleaner.generator import Generator
 from jumanji.environments.routing.cleaner.types import Observation, State
 from jumanji.testing.env_not_smoke import check_env_does_not_smoke
 from jumanji.testing.pytrees import assert_is_jax_array_tree
 from jumanji.types import StepType, TimeStep
 
 SAMPLE_GRID = jnp.array(
     [
-        [DIRTY, DIRTY, WALL, DIRTY, DIRTY],
+        [CLEAN, DIRTY, WALL, DIRTY, DIRTY],
         [WALL, DIRTY, WALL, DIRTY, WALL],
         [DIRTY, DIRTY, DIRTY, DIRTY, WALL],
         [DIRTY, WALL, WALL, DIRTY, WALL],
         [DIRTY, WALL, DIRTY, DIRTY, DIRTY],
     ]
 )
-N_AGENT = 3
 
 
 class DummyGenerator(Generator):
-    def __init__(self) -> None:
-        super(DummyGenerator, self).__init__(num_rows=5, num_cols=5)
+    """Dummy generator, generate an instance of size 5x5 with 3 agents."""
 
-    def __call__(self, key: chex.PRNGKey) -> Maze:
-        return SAMPLE_GRID
+    def __init__(self) -> None:
+        super(DummyGenerator, self).__init__(num_rows=5, num_cols=5, num_agents=3)
+
+    def __call__(self, key: chex.PRNGKey) -> State:
+        agents_locations = jnp.zeros((self.num_agents, 2), int)
+        return State(
+            grid=SAMPLE_GRID,
+            agents_locations=agents_locations,
+            action_mask=None,
+            step_count=jnp.array(0, jnp.int32),
+            key=key,
+        )
 
 
 class TestCleaner:
     @pytest.fixture
     def cleaner(self) -> Cleaner:
         generator = DummyGenerator()
-        return Cleaner(num_agents=N_AGENT, generator=generator)
+        return Cleaner(generator=generator)
 
     @pytest.fixture
     def key(self) -> chex.PRNGKey:
@@ -74,7 +82,7 @@ def test_cleaner__reset(self, cleaner: Cleaner, key: chex.PRNGKey) -> None:
         assert isinstance(timestep, TimeStep)
         assert isinstance(state, State)
 
-        assert jnp.all(state.agents_locations == jnp.zeros((N_AGENT, 2)))
+        assert jnp.all(state.agents_locations == jnp.zeros((cleaner.num_agents, 2)))
         assert jnp.sum(state.grid == CLEAN) == 1  # Only the top-left tile is clean
         assert state.step_count == 0
 
@@ -101,7 +109,7 @@ def test_cleaner__step(self, cleaner: Cleaner, key: chex.PRNGKey) -> None:
         step_fn = jax.jit(cleaner.step)
 
         # First action: all agents move right
-        actions = jnp.array([1] * N_AGENT)
+        actions = jnp.array([1] * cleaner.num_agents)
         state, timestep = step_fn(initial_state, actions)
         # Assert only one tile changed, on the right of the initial pos
         assert jnp.sum(state.grid != initial_state.grid) == 1
@@ -148,7 +156,7 @@ def test_cleaner__initial_action_mask(
 
         # All agents can only move right in the initial state
         expected_action_mask = jnp.array(
-            [[False, True, False, False] for _ in range(N_AGENT)]
+            [[False, True, False, False] for _ in range(cleaner.num_agents)]
         )
 
         assert jnp.all(state.action_mask == expected_action_mask)
@@ -177,7 +185,7 @@ def select_action(
                     key, jnp.arange(4), p=agent_action_mask.flatten()
                 )
 
-            subkeys = jax.random.split(key, N_AGENT)
+            subkeys = jax.random.split(key, cleaner.num_agents)
             return select_action(subkeys, observation.action_mask)
 
         check_env_does_not_smoke(cleaner, select_actions)