Fix typo: change MaskablePPO tb_log_name from "PPO" to "MaskablePPO" (#307)

Copilot · araffin · web-flow · commit 38b9103cdba5 · 2025-09-25T10:41:36.000+02:00
* Initial plan

* Fix typo: change MaskablePPO tb_log_name from "PPO" to "MaskablePPO"

Co-authored-by: araffin &lt;1973948+araffin@users.noreply.github.com&gt;

* Fix linter issues

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: araffin &lt;1973948+araffin@users.noreply.github.com&gt;
Co-authored-by: Antonin RAFFIN &lt;antonin.raffin@ensta.org&gt;
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -3,6 +3,28 @@
 Changelog
 ==========
 
+Release 2.7.1a3 (WIP)
+--------------------------
+
+Breaking Changes:
+^^^^^^^^^^^^^^^^^
+
+New Features:
+^^^^^^^^^^^^^
+
+Bug Fixes:
+^^^^^^^^^^
+- Fix tensorboard log name for ``MaskablePPO``
+
+Deprecations:
+^^^^^^^^^^^^^
+
+Others:
+^^^^^^^
+
+Documentation:
+^^^^^^^^^^^^^^
+
 Release 2.7.0 (2025-07-25)
 --------------------------
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,7 +35,7 @@ exclude = """(?x)(
 
 [tool.pytest.ini_options]
 # Deterministic ordering for tests; useful for pytest-xdist.
-env = ["PYTHONHASHSEED=0"]
+# env = ["PYTHONHASHSEED=0"]
 
 filterwarnings = [
     # Tensorboard warnings
diff --git a/sb3_contrib/common/recurrent/policies.py b/sb3_contrib/common/recurrent/policies.py
@@ -296,7 +296,7 @@ def predict_values(
         features = super(ActorCriticPolicy, self).extract_features(obs, self.vf_features_extractor)
 
         if self.lstm_critic is not None:
-            latent_vf, lstm_states_vf = self._process_sequence(features, lstm_states, episode_starts, self.lstm_critic)
+            latent_vf, _ = self._process_sequence(features, lstm_states, episode_starts, self.lstm_critic)
         elif self.shared_lstm:
             # Use LSTM from the actor
             latent_pi, _ = self._process_sequence(features, lstm_states, episode_starts, self.lstm_actor)
diff --git a/sb3_contrib/ppo_mask/ppo_mask.py b/sb3_contrib/ppo_mask/ppo_mask.py
@@ -221,7 +221,7 @@ def collect_rollouts(
         while n_steps < n_rollout_steps:
             with th.no_grad():
                 # Convert to pytorch tensor or to TensorDict
-                obs_tensor = obs_as_tensor(self._last_obs, self.device)
+                obs_tensor = obs_as_tensor(self._last_obs, self.device)  # type: ignore[arg-type]
 
                 # This is the only change related to invalid action masking
                 if use_masking:
@@ -431,7 +431,7 @@ def learn(  # type: ignore[override]
         total_timesteps: int,
         callback: MaybeCallback = None,
         log_interval: int = 1,
-        tb_log_name: str = "PPO",
+        tb_log_name: str = "MaskablePPO",
         reset_num_timesteps: bool = True,
         use_masking: bool = True,
         progress_bar: bool = False,
diff --git a/sb3_contrib/version.txt b/sb3_contrib/version.txt
@@ -1 +1 @@
-2.7.0
+2.7.1a3