Merge pull request FLAIROx#138 from FLAIROx/hanabi-observation-bug-fix

amacrutherford · web-flow · commit 08bf97ffb22f · 2025-03-06T16:34:34.000Z
Fixed issue FLAIROx#132
diff --git a/jaxmarl/environments/hanabi/hanabi.py b/jaxmarl/environments/hanabi/hanabi.py
@@ -9,7 +9,7 @@
 import chex
 from typing import Tuple, Dict
 from functools import partial
-from jaxmarl.environments.spaces import Discrete
+from jaxmarl.environments.spaces import Discrete, Box
 from .hanabi_game import HanabiGame, State
 
 
@@ -133,7 +133,7 @@ def __init__(
         if action_spaces is None:
             self.action_spaces = {i: Discrete(self.num_moves) for i in self.agents}
         if observation_spaces is None:
-            self.observation_spaces = {i: Discrete(self.obs_size) for i in self.agents}
+            self.observation_spaces = {i: Box(low=0, high=1, shape=self.obs_size) for i in self.agents}
 
     @partial(jax.jit, static_argnums=[0])
     def reset(self, key: chex.PRNGKey) -> Tuple[Dict, State]:
@@ -194,7 +194,7 @@ def get_obs(
         """Get all agents' observations."""
 
         # no agent-specific obs
-        board_fats = self.get_board_feats(new_state)
+        board_feats = self.get_board_feats(new_state)
         discard_feats = self._binarize_discard_pile(new_state.discard_pile)
 
         def _observe(aidx: int):
@@ -225,7 +225,7 @@ def _observe(aidx: int):
             return jnp.concatenate(
                 (
                     hands_feats,
-                    board_fats,
+                    board_feats,
                     discard_feats,
                     last_action_feats,
                     belief_v0_feats,
@@ -808,4 +808,4 @@ def get_card_knowledge_str(card_idx: int) -> str:
         legal_actions = [self.action_encoding[int(a)] for a in np.where(legal_moves)[0]]
         output += f"Legal Actions: {legal_actions}\n"
 
-        return output
+        return output