Skip to content

Commit 38b9103

Browse files
Copilotaraffin
andauthored
Fix typo: change MaskablePPO tb_log_name from "PPO" to "MaskablePPO" (#307)
* Initial plan * Fix typo: change MaskablePPO tb_log_name from "PPO" to "MaskablePPO" Co-authored-by: araffin <[email protected]> * Fix linter issues --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: araffin <[email protected]> Co-authored-by: Antonin RAFFIN <[email protected]>
1 parent 33889db commit 38b9103

File tree

5 files changed

+27
-5
lines changed

5 files changed

+27
-5
lines changed

docs/misc/changelog.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,28 @@
33
Changelog
44
==========
55

6+
Release 2.7.1a3 (WIP)
7+
--------------------------
8+
9+
Breaking Changes:
10+
^^^^^^^^^^^^^^^^^
11+
12+
New Features:
13+
^^^^^^^^^^^^^
14+
15+
Bug Fixes:
16+
^^^^^^^^^^
17+
- Fix tensorboard log name for ``MaskablePPO``
18+
19+
Deprecations:
20+
^^^^^^^^^^^^^
21+
22+
Others:
23+
^^^^^^^
24+
25+
Documentation:
26+
^^^^^^^^^^^^^^
27+
628
Release 2.7.0 (2025-07-25)
729
--------------------------
830

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ exclude = """(?x)(
3535

3636
[tool.pytest.ini_options]
3737
# Deterministic ordering for tests; useful for pytest-xdist.
38-
env = ["PYTHONHASHSEED=0"]
38+
# env = ["PYTHONHASHSEED=0"]
3939

4040
filterwarnings = [
4141
# Tensorboard warnings

sb3_contrib/common/recurrent/policies.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def predict_values(
296296
features = super(ActorCriticPolicy, self).extract_features(obs, self.vf_features_extractor)
297297

298298
if self.lstm_critic is not None:
299-
latent_vf, lstm_states_vf = self._process_sequence(features, lstm_states, episode_starts, self.lstm_critic)
299+
latent_vf, _ = self._process_sequence(features, lstm_states, episode_starts, self.lstm_critic)
300300
elif self.shared_lstm:
301301
# Use LSTM from the actor
302302
latent_pi, _ = self._process_sequence(features, lstm_states, episode_starts, self.lstm_actor)

sb3_contrib/ppo_mask/ppo_mask.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def collect_rollouts(
221221
while n_steps < n_rollout_steps:
222222
with th.no_grad():
223223
# Convert to pytorch tensor or to TensorDict
224-
obs_tensor = obs_as_tensor(self._last_obs, self.device)
224+
obs_tensor = obs_as_tensor(self._last_obs, self.device) # type: ignore[arg-type]
225225

226226
# This is the only change related to invalid action masking
227227
if use_masking:
@@ -431,7 +431,7 @@ def learn( # type: ignore[override]
431431
total_timesteps: int,
432432
callback: MaybeCallback = None,
433433
log_interval: int = 1,
434-
tb_log_name: str = "PPO",
434+
tb_log_name: str = "MaskablePPO",
435435
reset_num_timesteps: bool = True,
436436
use_masking: bool = True,
437437
progress_bar: bool = False,

sb3_contrib/version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.7.0
1+
2.7.1a3

0 commit comments

Comments
 (0)