Upgrade to python 3.7+ syntax (#69)

araffin · web-flow · commit bec00386d14b · 2022-04-25T13:02:07.000+02:00
* Upgrade to python 3.7+ syntax

* Switch to PyTorch 1.11
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         # cpu version of pytorch
-        pip install torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
+        pip install torch==1.11+cpu -f https://download.pytorch.org/whl/torch_stable.html
         # Install dependencies for docs and tests
         pip install stable_baselines3[extra,tests,docs]
         # Install master version
diff --git a/docs/conf.py b/docs/conf.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #
 # Configuration file for the Sphinx documentation builder.
 #
@@ -46,7 +45,7 @@ def __getattr__(cls, name):
 
 # Read version from file
 version_file = os.path.join(os.path.dirname(__file__), "../sb3_contrib", "version.txt")
-with open(version_file, "r") as file_handler:
+with open(version_file) as file_handler:
     __version__ = file_handler.read().strip()
 
 # -- Project information -----------------------------------------------------
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -3,15 +3,17 @@
 Changelog
 ==========
 
-Release 1.5.1a1 (WIP)
+Release 1.5.1a5 (WIP)
 -------------------------------
 
 Breaking Changes:
 ^^^^^^^^^^^^^^^^^
-- Upgraded to Stable-Baselines3 >= 1.5.1a1
+- Upgraded to Stable-Baselines3 >= 1.5.1a5
 - Changed the way policy "aliases" are handled ("MlpPolicy", "CnnPolicy", ...), removing the former
   ``register_policy`` helper, ``policy_base`` parameter and using ``policy_aliases`` static attributes instead (@Gregwar)
 - Renamed ``rollout/exploration rate`` key to ``rollout/exploration_rate`` for QRDQN (to be consistent with SB3 DQN)
+- Upgraded to python 3.7+ syntax using ``pyupgrade``
+- SB3 now requires PyTorch >= 1.11
 
 New Features:
 ^^^^^^^^^^^^^
diff --git a/sb3_contrib/__init__.py b/sb3_contrib/__init__.py
@@ -8,5 +8,5 @@
 
 # Read version from file
 version_file = os.path.join(os.path.dirname(__file__), "version.txt")
-with open(version_file, "r") as file_handler:
+with open(version_file) as file_handler:
     __version__ = file_handler.read().strip()
diff --git a/sb3_contrib/common/maskable/buffers.py b/sb3_contrib/common/maskable/buffers.py
@@ -145,9 +145,7 @@ def __init__(
         n_envs: int = 1,
     ):
         self.action_masks = None
-        super(MaskableDictRolloutBuffer, self).__init__(
-            buffer_size, observation_space, action_space, device, gae_lambda, gamma, n_envs=n_envs
-        )
+        super().__init__(buffer_size, observation_space, action_space, device, gae_lambda, gamma, n_envs=n_envs)
 
     def reset(self) -> None:
         if isinstance(self.action_space, spaces.Discrete):
@@ -162,7 +160,7 @@ def reset(self) -> None:
         self.mask_dims = mask_dims
         self.action_masks = np.ones((self.buffer_size, self.n_envs, self.mask_dims), dtype=np.float32)
 
-        super(MaskableDictRolloutBuffer, self).reset()
+        super().reset()
 
     def add(self, *args, action_masks: Optional[np.ndarray] = None, **kwargs) -> None:
         """
@@ -171,7 +169,7 @@ def add(self, *args, action_masks: Optional[np.ndarray] = None, **kwargs) -> Non
         if action_masks is not None:
             self.action_masks[self.pos] = action_masks.reshape((self.n_envs, self.mask_dims))
 
-        super(MaskableDictRolloutBuffer, self).add(*args, **kwargs)
+        super().add(*args, **kwargs)
 
     def get(self, batch_size: Optional[int] = None) -> Generator[MaskableDictRolloutBufferSamples, None, None]:
         assert self.full, ""
diff --git a/sb3_contrib/common/maskable/policies.py b/sb3_contrib/common/maskable/policies.py
@@ -345,7 +345,7 @@ def __init__(
         optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam,
         optimizer_kwargs: Optional[Dict[str, Any]] = None,
     ):
-        super(MaskableActorCriticCnnPolicy, self).__init__(
+        super().__init__(
             observation_space,
             action_space,
             lr_schedule,
@@ -396,7 +396,7 @@ def __init__(
         optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam,
         optimizer_kwargs: Optional[Dict[str, Any]] = None,
     ):
-        super(MaskableMultiInputActorCriticPolicy, self).__init__(
+        super().__init__(
             observation_space,
             action_space,
             lr_schedule,
diff --git a/sb3_contrib/common/vec_env/async_eval.py b/sb3_contrib/common/vec_env/async_eval.py
@@ -72,7 +72,7 @@ def _worker(
             break
 
 
-class AsyncEval(object):
+class AsyncEval:
     """
     Helper class to do asynchronous evaluation of different policies with multiple processes.
     It is useful when implementing population based methods like Evolution Strategies (ES),
diff --git a/sb3_contrib/common/wrappers/time_feature.py b/sb3_contrib/common/wrappers/time_feature.py
@@ -50,7 +50,7 @@ def __init__(self, env: gym.Env, max_steps: int = 1000, test_mode: bool = False)
         else:
             env.observation_space = gym.spaces.Box(low=low, high=high, dtype=self.dtype)
 
-        super(TimeFeatureWrapper, self).__init__(env)
+        super().__init__(env)
 
         # Try to infer the max number of steps per episode
         try:
diff --git a/sb3_contrib/qrdqn/policies.py b/sb3_contrib/qrdqn/policies.py
@@ -38,7 +38,7 @@ def __init__(
         activation_fn: Type[nn.Module] = nn.ReLU,
         normalize_images: bool = True,
     ):
-        super(QuantileNetwork, self).__init__(
+        super().__init__(
             observation_space,
             action_space,
             features_extractor=features_extractor,
@@ -125,7 +125,7 @@ def __init__(
         optimizer_kwargs: Optional[Dict[str, Any]] = None,
     ):
 
-        super(QRDQNPolicy, self).__init__(
+        super().__init__(
             observation_space,
             action_space,
             features_extractor_class,
@@ -246,7 +246,7 @@ def __init__(
         optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam,
         optimizer_kwargs: Optional[Dict[str, Any]] = None,
     ):
-        super(CnnPolicy, self).__init__(
+        super().__init__(
             observation_space,
             action_space,
             lr_schedule,
@@ -294,7 +294,7 @@ def __init__(
         optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam,
         optimizer_kwargs: Optional[Dict[str, Any]] = None,
     ):
-        super(MultiInputPolicy, self).__init__(
+        super().__init__(
             observation_space,
             action_space,
             lr_schedule,
diff --git a/sb3_contrib/qrdqn/qrdqn.py b/sb3_contrib/qrdqn/qrdqn.py
@@ -93,7 +93,7 @@ def __init__(
         _init_setup_model: bool = True,
     ):
 
-        super(QRDQN, self).__init__(
+        super().__init__(
             policy,
             env,
             learning_rate,
@@ -139,7 +139,7 @@ def __init__(
             self._setup_model()
 
     def _setup_model(self) -> None:
-        super(QRDQN, self)._setup_model()
+        super()._setup_model()
         self._create_aliases()
         self.exploration_schedule = get_linear_fn(
             self.exploration_initial_eps, self.exploration_final_eps, self.exploration_fraction
@@ -253,7 +253,7 @@ def learn(
         reset_num_timesteps: bool = True,
     ) -> OffPolicyAlgorithm:
 
-        return super(QRDQN, self).learn(
+        return super().learn(
             total_timesteps=total_timesteps,
             callback=callback,
             log_interval=log_interval,
@@ -266,7 +266,7 @@ def learn(
         )
 
     def _excluded_save_params(self) -> List[str]:
-        return super(QRDQN, self)._excluded_save_params() + ["quantile_net", "quantile_net_target"]
+        return super()._excluded_save_params() + ["quantile_net", "quantile_net_target"]
 
     def _get_torch_save_params(self) -> Tuple[List[str], List[str]]:
         state_dicts = ["policy", "policy.optimizer"]
diff --git a/sb3_contrib/tqc/policies.py b/sb3_contrib/tqc/policies.py
@@ -64,7 +64,7 @@ def __init__(
         clip_mean: float = 2.0,
         normalize_images: bool = True,
     ):
-        super(Actor, self).__init__(
+        super().__init__(
             observation_space,
             action_space,
             features_extractor=features_extractor,
@@ -299,7 +299,7 @@ def __init__(
         n_critics: int = 2,
         share_features_extractor: bool = True,
     ):
-        super(TQCPolicy, self).__init__(
+        super().__init__(
             observation_space,
             action_space,
             features_extractor_class,
@@ -486,7 +486,7 @@ def __init__(
         n_critics: int = 2,
         share_features_extractor: bool = True,
     ):
-        super(CnnPolicy, self).__init__(
+        super().__init__(
             observation_space,
             action_space,
             lr_schedule,
@@ -560,7 +560,7 @@ def __init__(
         n_critics: int = 2,
         share_features_extractor: bool = True,
     ):
-        super(MultiInputPolicy, self).__init__(
+        super().__init__(
             observation_space,
             action_space,
             lr_schedule,
diff --git a/sb3_contrib/tqc/tqc.py b/sb3_contrib/tqc/tqc.py
@@ -103,7 +103,7 @@ def __init__(
         _init_setup_model: bool = True,
     ):
 
-        super(TQC, self).__init__(
+        super().__init__(
             policy,
             env,
             learning_rate,
@@ -144,7 +144,7 @@ def __init__(
             self._setup_model()
 
     def _setup_model(self) -> None:
-        super(TQC, self)._setup_model()
+        super()._setup_model()
         self._create_aliases()
 
         # Target entropy is used when learning the entropy coefficient
@@ -293,7 +293,7 @@ def learn(
         reset_num_timesteps: bool = True,
     ) -> OffPolicyAlgorithm:
 
-        return super(TQC, self).learn(
+        return super().learn(
             total_timesteps=total_timesteps,
             callback=callback,
             log_interval=log_interval,
@@ -307,7 +307,7 @@ def learn(
 
     def _excluded_save_params(self) -> List[str]:
         # Exclude aliases
-        return super(TQC, self)._excluded_save_params() + ["actor", "critic", "critic_target"]
+        return super()._excluded_save_params() + ["actor", "critic", "critic_target"]
 
     def _get_torch_save_params(self) -> Tuple[List[str], List[str]]:
         state_dicts = ["policy", "actor.optimizer", "critic.optimizer"]
diff --git a/sb3_contrib/trpo/trpo.py b/sb3_contrib/trpo/trpo.py
@@ -101,7 +101,7 @@ def __init__(
         _init_setup_model: bool = True,
     ):
 
-        super(TRPO, self).__init__(
+        super().__init__(
             policy,
             env,
             learning_rate=learning_rate,
@@ -414,7 +414,7 @@ def learn(
         reset_num_timesteps: bool = True,
     ) -> OnPolicyAlgorithm:
 
-        return super(TRPO, self).learn(
+        return super().learn(
             total_timesteps=total_timesteps,
             callback=callback,
             log_interval=log_interval,
diff --git a/sb3_contrib/version.txt b/sb3_contrib/version.txt
@@ -1 +1 @@
-1.5.1a1
+1.5.1a5
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 from setuptools import find_packages, setup
 
-with open(os.path.join("sb3_contrib", "version.txt"), "r") as file_handler:
+with open(os.path.join("sb3_contrib", "version.txt")) as file_handler:
     __version__ = file_handler.read().strip()
 
 
@@ -65,7 +65,7 @@
     packages=[package for package in find_packages() if package.startswith("sb3_contrib")],
     package_data={"sb3_contrib": ["py.typed", "version.txt"]},
     install_requires=[
-        "stable_baselines3>=1.5.1a1",
+        "stable_baselines3>=1.5.1a5",
     ],
     description="Contrib package of Stable Baselines3, experimental code.",
     author="Antonin Raffin",
diff --git a/tests/test_save_load.py b/tests/test_save_load.py
@@ -66,7 +66,7 @@ def test_save_load(tmp_path, model_class):
         model.set_parameters(invalid_object_params, exact_match=False)
 
     # Test that exact_match catches when something was missed.
-    missing_object_params = dict((k, v) for k, v in list(original_params.items())[:-1])
+    missing_object_params = {k: v for k, v in list(original_params.items())[:-1]}
     with pytest.raises(ValueError):
         model.set_parameters(missing_object_params, exact_match=True)
 
@@ -312,7 +312,7 @@ def test_save_load_policy(tmp_path, model_class, policy_str):
     params = deepcopy(policy.state_dict())
 
     # Modify all parameters to be random values
-    random_params = dict((param_name, th.rand_like(param)) for param_name, param in params.items())
+    random_params = {param_name: th.rand_like(param) for param_name, param in params.items()}
 
     # Update model parameters with the new random values
     policy.load_state_dict(random_params)
@@ -409,7 +409,7 @@ def test_save_load_q_net(tmp_path, model_class, policy_str):
     params = deepcopy(q_net.state_dict())
 
     # Modify all parameters to be random values
-    random_params = dict((param_name, th.rand_like(param)) for param_name, param in params.items())
+    random_params = {param_name: th.rand_like(param) for param_name, param in params.items()}
 
     # Update model parameters with the new random values
     q_net.load_state_dict(random_params)
diff --git a/tests/test_train_eval_mode.py b/tests/test_train_eval_mode.py
@@ -21,7 +21,7 @@ class FlattenBatchNormDropoutExtractor(BaseFeaturesExtractor):
     """
 
     def __init__(self, observation_space: gym.Space):
-        super(FlattenBatchNormDropoutExtractor, self).__init__(
+        super().__init__(
             observation_space,
             get_flattened_obs_dim(observation_space),
         )

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-# -- coding: utf-8 --`
`2`	`1`	`#`
`3`	`2`	`# Configuration file for the Sphinx documentation builder.`
`4`	`3`	`#`
`@@ -46,7 +45,7 @@ def __getattr__(cls, name):`
`46`	`45`
`47`	`46`	`# Read version from file`
`48`	`47`	`version_file = os.path.join(os.path.dirname(__file__), "../sb3_contrib", "version.txt")`
`49`		`-with open(version_file, "r") as file_handler:`
	`48`	`+with open(version_file) as file_handler:`
`50`	`49`	`__version__ = file_handler.read().strip()`
`51`	`50`
`52`	`51`	`# -- Project information -----------------------------------------------------`