Release 2.7.0 (#304)

araffin · web-flow · commit 33889dbb215b · 2025-07-25T12:14:57.000+02:00
* Release 2.7.0

* Fix type checker and linter errors
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -3,7 +3,7 @@
 Changelog
 ==========
 
-Release 2.7.0a0 (WIP)
+Release 2.7.0 (2025-07-25)
 --------------------------
 
 Breaking Changes:
diff --git a/sb3_contrib/common/maskable/buffers.py b/sb3_contrib/common/maskable/buffers.py
@@ -59,7 +59,7 @@ def __init__(
 
     def reset(self) -> None:
         if isinstance(self.action_space, spaces.Discrete):
-            mask_dims = self.action_space.n
+            mask_dims = int(self.action_space.n)
         elif isinstance(self.action_space, spaces.MultiDiscrete):
             mask_dims = sum(self.action_space.nvec)
         elif isinstance(self.action_space, spaces.MultiBinary):
@@ -165,7 +165,7 @@ def __init__(
 
     def reset(self) -> None:
         if isinstance(self.action_space, spaces.Discrete):
-            mask_dims = self.action_space.n
+            mask_dims = int(self.action_space.n)
         elif isinstance(self.action_space, spaces.MultiDiscrete):
             mask_dims = sum(self.action_space.nvec)
         elif isinstance(self.action_space, spaces.MultiBinary):
diff --git a/sb3_contrib/common/recurrent/buffers.py b/sb3_contrib/common/recurrent/buffers.py
@@ -231,7 +231,7 @@ def _get_samples(
         return RecurrentRolloutBufferSamples(
             # (batch_size, obs_dim) -> (n_seq, max_length, obs_dim) -> (n_seq * max_length, obs_dim)
             observations=self.pad(self.observations[batch_inds]).reshape((padded_batch_size, *self.obs_shape)),
-            actions=self.pad(self.actions[batch_inds]).reshape((padded_batch_size,) + self.actions.shape[1:]),
+            actions=self.pad(self.actions[batch_inds]).reshape((padded_batch_size, *self.actions.shape[1:])),
             old_values=self.pad_and_flatten(self.values[batch_inds]),
             old_log_prob=self.pad_and_flatten(self.log_probs[batch_inds]),
             advantages=self.pad_and_flatten(self.advantages[batch_inds]),
@@ -374,7 +374,7 @@ def _get_samples(
 
         return RecurrentDictRolloutBufferSamples(
             observations=observations,
-            actions=self.pad(self.actions[batch_inds]).reshape((padded_batch_size,) + self.actions.shape[1:]),
+            actions=self.pad(self.actions[batch_inds]).reshape((padded_batch_size, *self.actions.shape[1:])),
             old_values=self.pad_and_flatten(self.values[batch_inds]),
             old_log_prob=self.pad_and_flatten(self.log_probs[batch_inds]),
             advantages=self.pad_and_flatten(self.advantages[batch_inds]),
diff --git a/sb3_contrib/version.txt b/sb3_contrib/version.txt
@@ -1 +1 @@
-2.7.0a0
+2.7.0
diff --git a/setup.py b/setup.py
@@ -67,7 +67,7 @@
     packages=[package for package in find_packages() if package.startswith("sb3_contrib")],
     package_data={"sb3_contrib": ["py.typed", "version.txt"]},
     install_requires=[
-        "stable_baselines3>=2.7.0a0,<3.0",
+        "stable_baselines3>=2.7.0,<3.0",
     ],
     description="Contrib package of Stable Baselines3, experimental code.",
     author="Antonin Raffin",