instadeepai
diff --git a/‎jumanji/environments/logic/minesweeper/conftest.py‎
Lines changed: 5 additions & 2 deletions b/‎jumanji/environments/logic/minesweeper/conftest.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎jumanji/environments/logic/minesweeper/constants.py‎
Lines changed: 1 addition & 3 deletions b/‎jumanji/environments/logic/minesweeper/constants.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎jumanji/environments/logic/minesweeper/env.py‎
Lines changed: 53 additions & 171 deletions b/‎jumanji/environments/logic/minesweeper/env.py‎
Lines changed: 53 additions & 171 deletions
@@ -18,13 +18,16 @@
 
 from jumanji.environments.logic.minesweeper.constants import UNEXPLORED_ID
 from jumanji.environments.logic.minesweeper.env import Minesweeper
+from jumanji.environments.logic.minesweeper.generator import UniformSamplingGenerator
 from jumanji.environments.logic.minesweeper.types import State
 
 
 @pytest.fixture
 def minesweeper_env() -> Minesweeper:
-    """Fixture for a default minesweeper env"""
-    return Minesweeper()
+    """Fixture for a default minesweeper environment with 10 rows and columns, and 10 mines."""
+    return Minesweeper(
+        generator=UniformSamplingGenerator(num_rows=10, num_cols=10, num_mines=10)
+    )
 
 
 @pytest.fixture
 
@@ -15,9 +15,7 @@
 UNEXPLORED_ID: int = -1
 IS_MINE: int = 1
 PATCH_SIZE: int = 3
-REVEALED_EMPTY_SQUARE_REWARD: float = 1.0
-REVEALED_MINE_OR_INVALID_ACTION_REWARD: float = 0.0
-COLOUR_MAPPING: list = [
+DEFAULT_COLOR_MAPPING: list = [
     "orange",
     "blue",
     "green",
 
@@ -12,31 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Optional, Sequence, Tuple
+from typing import Optional, Sequence, Tuple
 
 import chex
 import jax
 import jax.numpy as jnp
 import matplotlib.animation
-import matplotlib.pyplot as plt
+from numpy.typing import NDArray
 
-import jumanji.environments
 from jumanji import specs
 from jumanji.env import Environment
-from jumanji.environments.logic.minesweeper.constants import (
-    COLOUR_MAPPING,
-    PATCH_SIZE,
-    UNEXPLORED_ID,
-)
+from jumanji.environments.logic.minesweeper.constants import PATCH_SIZE, UNEXPLORED_ID
 from jumanji.environments.logic.minesweeper.done import DefaultDoneFn, DoneFn
+from jumanji.environments.logic.minesweeper.generator import (
+    Generator,
+    UniformSamplingGenerator,
+)
 from jumanji.environments.logic.minesweeper.reward import DefaultRewardFn, RewardFn
 from jumanji.environments.logic.minesweeper.types import Observation, State
-from jumanji.environments.logic.minesweeper.utils import (
-    count_adjacent_mines,
-    create_flat_mine_locations,
-    explored_mine,
-)
+from jumanji.environments.logic.minesweeper.utils import count_adjacent_mines
+from jumanji.environments.logic.minesweeper.viewer import MinesweeperViewer
 from jumanji.types import TimeStep, restart, termination, transition
+from jumanji.viewer import Viewer
 
 
 class Minesweeper(Environment[State]):
@@ -53,7 +50,7 @@ class Minesweeper(Environment[State]):
             specifies how many timesteps have elapsed since environment reset.
 
     - action:
-        multi discrete array containing the square to explore (height and width).
+        multi discrete array containing the square to explore (row and col).
 
     - reward: jax array (float32):
         Configurable function of state and action. By default:
@@ -92,46 +89,47 @@ class Minesweeper(Environment[State]):
 
     def __init__(
         self,
-        num_rows: int = 10,
-        num_cols: int = 10,
-        num_mines: int = 10,
+        generator: Optional[Generator] = None,
         reward_function: Optional[RewardFn] = None,
         done_function: Optional[DoneFn] = None,
-        color_mapping: Optional[List[str]] = None,
+        viewer: Optional[Viewer[State]] = None,
     ):
         """Instantiate a `Minesweeper` environment.
 
         Args:
-            num_rows: number of rows, i.e. height of the board. Defaults to 10.
-            num_cols: number of columns, i.e. width of the board. Defaults to 10.
-            num_mines: number of mines on the board. Defaults to 10.
+            generator: `Generator` to generate problem instances on environment reset.
+                Implemented options are [`SamplingGenerator`]. Defaults to `SamplingGenerator`.
+                The generator will have attributes:
+                    - num_rows: number of rows, i.e. height of the board. Defaults to 10.
+                    - num_cols: number of columns, i.e. width of the board. Defaults to 10.
+                    - num_mines: number of mines generated. Defaults to 10.
             reward_function: `RewardFn` whose `__call__` method computes the reward of an
                 environment transition based on the given current state and selected action.
-                Implemented options are [`DefaultRewardFn`]. Defaults to `DefaultRewardFn`.
+                Implemented options are [`DefaultRewardFn`]. Defaults to `DefaultRewardFn`, giving
+                a reward of 1.0 for revealing an empty square, 0.0 for revealing a mine, and
+                0.0 for an invalid action (selecting an already revealed square).
             done_function: `DoneFn` whose `__call__` method computes the done signal given the
                 current state, action taken, and next state.
-                Implemented options are [`DefaultDoneFn`]. Defaults to `DefaultDoneFn`.
-            color_mapping: colour map used for rendering.
+                Implemented options are [`DefaultDoneFn`]. Defaults to `DefaultDoneFn`, ending the
+                episode on solving the board, revealing a mine, or picking an invalid action.
+            viewer: `Viewer` to support rendering and animation methods.
+                Implemented options are [`MinesweeperViewer`]. Defaults to `MinesweeperViewer`.
         """
-        if num_rows <= 1 or num_cols <= 1:
-            raise ValueError(
-                f"Should make a board of height and width greater than 1, "
-                f"got num_rows={num_rows}, num_cols={num_cols}"
-            )
-        if num_mines < 0 or num_mines >= num_rows * num_cols:
-            raise ValueError(
-                f"Number of mines should be constrained between 0 and the size of the board, "
-                f"got {num_mines}"
-            )
-        self.num_rows = num_rows
-        self.num_cols = num_cols
-        self.num_mines = num_mines
-        self.reward_function = reward_function or DefaultRewardFn()
+        self.reward_function = reward_function or DefaultRewardFn(
+            revealed_empty_square_reward=1.0,
+            revealed_mine_reward=0.0,
+            invalid_action_reward=0.0,
+        )
         self.done_function = done_function or DefaultDoneFn()
-
-        self.cmap = color_mapping if color_mapping else COLOUR_MAPPING
-        self.figure_name = f"{num_rows}x{num_cols} Minesweeper"
-        self.figure_size = (6.0, 6.0)
+        self.generator = generator or UniformSamplingGenerator(
+            num_rows=10, num_cols=10, num_mines=10
+        )
+        self.num_rows = self.generator.num_rows
+        self.num_cols = self.generator.num_cols
+        self.num_mines = self.generator.num_mines
+        self._viewer = viewer or MinesweeperViewer(
+            num_rows=self.num_rows, num_cols=self.num_cols
+        )
 
     def reset(self, key: chex.PRNGKey) -> Tuple[State, TimeStep[Observation]]:
         """Resets the environment.
@@ -144,25 +142,7 @@ def reset(self, key: chex.PRNGKey) -> Tuple[State, TimeStep[Observation]]:
             timestep: `TimeStep` corresponding to the first timestep returned by the
                 environment.
         """
-        key, sample_key = jax.random.split(key)
-        board = jnp.full(
-            shape=(self.num_rows, self.num_cols),
-            fill_value=UNEXPLORED_ID,
-            dtype=jnp.int32,
-        )
-        step_count = jnp.array(0, jnp.int32)
-        flat_mine_locations = create_flat_mine_locations(
-            key=sample_key,
-            num_rows=self.num_rows,
-            num_cols=self.num_cols,
-            num_mines=self.num_mines,
-        )
-        state = State(
-            board=board,
-            step_count=step_count,
-            key=key,
-            flat_mine_locations=flat_mine_locations,
-        )
+        state = self.generator(key)
         observation = self._state_to_observation(state=state)
         timestep = restart(observation=observation)
         return state, timestep
@@ -180,9 +160,7 @@ def step(
             next_state: `State` corresponding to the next state of the environment,
             next_timestep: `TimeStep` corresponding to the timestep returned by the environment.
         """
-        board = state.board
-        action_height, action_width = action
-        board = board.at[action_height, action_width].set(
+        board = state.board.at[tuple(action)].set(
             count_adjacent_mines(state=state, action=action)
         )
         step_count = state.step_count + 1
@@ -272,134 +250,38 @@ def _state_to_observation(self, state: State) -> Observation:
             step_count=state.step_count,
         )
 
-    def render(self, state: State) -> None:
-        """Render the given environment state using matplotlib.
+    def render(self, state: State) -> Optional[NDArray]:
+        """Renders the current state of the board.
 
         Args:
-            state: environment state to be rendered.
-
+            state: the current state to be rendered.
         """
-        self._clear_display()
-        fig, ax = self._get_fig_ax()
-        self._draw(ax, state)
-        self._update_display(fig)
+        return self._viewer.render(state=state)
 
     def animate(
         self,
         states: Sequence[State],
         interval: int = 200,
         save_path: Optional[str] = None,
     ) -> matplotlib.animation.FuncAnimation:
-        """Create an animation from a sequence of environment states.
+        """Creates an animated gif of the board based on the sequence of states.
 
         Args:
-            states: sequence of environment states corresponding to consecutive timesteps.
-            interval: delay between frames in milliseconds, default to 200.
+            states: a list of `State` objects representing the sequence of states.
+            interval: the delay between frames in milliseconds, default to 200.
             save_path: the path where the animation file should be saved. If it is None, the plot
                 will not be saved.
 
         Returns:
-            Animation object that can be saved as a GIF, MP4, or rendered with HTML.
+            animation.FuncAnimation: the animation object that was created.
         """
-        fig, ax = self._get_fig_ax()
-        plt.tight_layout()
-        plt.close(fig)
-
-        def make_frame(state_index: int) -> None:
-            state = states[state_index]
-            self._draw(ax, state)
-
-        # Create the animation object.
-        self._animation = matplotlib.animation.FuncAnimation(
-            fig,
-            make_frame,
-            frames=len(states),
-            interval=interval,
+        return self._viewer.animate(
+            states=states, interval=interval, save_path=save_path
         )
 
-        # Save the animation as a GIF.
-        if save_path:
-            self._animation.save(save_path)
-
-        return self._animation
-
     def close(self) -> None:
         """Perform any necessary cleanup.
-
         Environments will automatically :meth:`close()` themselves when
         garbage collected or when the program exits.
         """
-        plt.close(self.figure_name)
-
-    def _get_fig_ax(self) -> Tuple[plt.Figure, plt.Axes]:
-        exists = plt.fignum_exists(self.figure_name)
-        if exists:
-            fig = plt.figure(self.figure_name)
-            ax = fig.get_axes()[0]
-        else:
-            fig = plt.figure(self.figure_name, figsize=self.figure_size)
-            plt.suptitle(self.figure_name)
-            plt.tight_layout()
-            if not plt.isinteractive():
-                fig.show()
-            ax = fig.add_subplot()
-        return fig, ax
-
-    def _draw(self, ax: plt.Axes, state: State) -> None:
-        ax.clear()
-        ax.set_xticks(jnp.arange(-0.5, self.num_cols - 1, 1))
-        ax.set_yticks(jnp.arange(-0.5, self.num_rows - 1, 1))
-        ax.tick_params(
-            top=False,
-            bottom=False,
-            left=False,
-            right=False,
-            labelleft=False,
-            labelbottom=False,
-            labeltop=False,
-            labelright=False,
-        )
-        background = jnp.ones_like(state.board)
-        for i in range(self.num_rows):
-            for j in range(self.num_cols):
-                background = self._render_grid_square(
-                    state=state, ax=ax, i=i, j=j, background=background
-                )
-        ax.imshow(background, cmap="gray", vmin=0, vmax=1)
-        ax.grid(color="black", linestyle="-", linewidth=2)
-
-    def _render_grid_square(
-        self, state: State, ax: plt.Axes, i: int, j: int, background: chex.Array
-    ) -> chex.Array:
-        board_value = state.board[i, j]
-        if board_value != UNEXPLORED_ID:
-            if explored_mine(state=state, action=jnp.array([i, j], dtype=jnp.int32)):
-                background = background.at[i, j].set(0)
-            else:
-                ax.text(
-                    j,
-                    i,
-                    str(board_value),
-                    color=self.cmap[board_value],
-                    ha="center",
-                    va="center",
-                    fontsize="xx-large",
-                )
-        return background
-
-    def _update_display(self, fig: plt.Figure) -> None:
-        if plt.isinteractive():
-            # Required to update render when using Jupyter Notebook.
-            fig.canvas.draw()
-            if jumanji.environments.is_colab():
-                plt.show(self.figure_name)
-        else:
-            # Required to update render when not using Jupyter Notebook.
-            fig.canvas.draw_idle()
-            fig.canvas.flush_events()
-
-    def _clear_display(self) -> None:
-        if jumanji.environments.is_colab():
-            import IPython.display
-
-            IPython.display.clear_output(True)
+        self._viewer.close()