Fix test_vec_normalize.py, test_tensorboard.py and common/monitor.py type hint (#1194)

qgallouedec · araffin · web-flow · commit 92f7a6f23b90 · 2023-01-13T18:28:22.000+01:00
* Remove from mypy exclude

* type hint for metadata

* Union[float, int] -&gt; float

* Remove useless __init__

* Type hint for model and logger in BaseCallback

* Type hint for metric_dict

* Update changelog

* fix test_tensorboard

* ignore gamma type checking

* Fix monitor type hint

* Update logger type hints

* Fix type annotation and bump version

* Fix circular import

Co-authored-by: Antonin RAFFIN &lt;antonin.raffin@ensta.org&gt;
diff --git a/docs/guide/tensorboard.rst b/docs/guide/tensorboard.rst
@@ -268,11 +268,9 @@ Here is an example of how to save hyperparameters in TensorBoard:
 
 
     class HParamCallback(BaseCallback):
-        def __init__(self):
-            """
-            Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.
-            """
-            super().__init__()
+        """
+        Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.
+        """
 
         def _on_training_start(self) -> None:
             hparam_dict = {
@@ -284,7 +282,7 @@ Here is an example of how to save hyperparameters in TensorBoard:
             # Tensorbaord will find & display metrics from the `SCALARS` tab
             metric_dict = {
                 "rollout/ep_len_mean": 0,
-                "train/value_loss": 0,
+                "train/value_loss": 0.0,
             }
             self.logger.record(
                 "hparams",
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -4,7 +4,7 @@ Changelog
 ==========
 
 
-Release 1.8.0a0 (WIP)
+Release 1.8.0a1 (WIP)
 --------------------------
 
 
@@ -28,6 +28,9 @@ Deprecations:
 
 Others:
 ^^^^^^^
+- Fixed ``tests/test_tensorboard.py`` type hint
+- Fixed ``tests/test_vec_normalize.py`` type hint
+- Fixed ``stable_baselines3/common/monitor.py`` type hint
 
 Documentation:
 ^^^^^^^^^^^^^^
diff --git a/setup.cfg b/setup.cfg
@@ -39,7 +39,6 @@ exclude = (?x)(
     | stable_baselines3/common/envs/identity_env.py$
     | stable_baselines3/common/envs/multi_input_envs.py$
     | stable_baselines3/common/logger.py$
-    | stable_baselines3/common/monitor.py$
     | stable_baselines3/common/off_policy_algorithm.py$
     | stable_baselines3/common/on_policy_algorithm.py$
     | stable_baselines3/common/policies.py$
@@ -67,9 +66,7 @@ exclude = (?x)(
     | stable_baselines3/td3/policies.py$
     | stable_baselines3/td3/td3.py$
     | tests/test_logger.py$
-    | tests/test_tensorboard.py$
     | tests/test_train_eval_mode.py$
-    | tests/test_vec_normalize.py$
   )
 
 [flake8]
diff --git a/stable_baselines3/common/callbacks.py b/stable_baselines3/common/callbacks.py
@@ -6,6 +6,8 @@
 import gym
 import numpy as np
 
+from stable_baselines3.common.logger import Logger
+
 try:
     from tqdm import TqdmExperimentalWarning
 
@@ -29,10 +31,13 @@ class BaseCallback(ABC):
     :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
     """
 
+    # The RL model
+    # Type hint as string to avoid circular import
+    model: "base_class.BaseAlgorithm"
+    logger: Logger
+
     def __init__(self, verbose: int = 0):
         super().__init__()
-        # The RL model
-        self.model = None  # type: Optional[base_class.BaseAlgorithm]
         # An alias for self.model.get_env(), the environment used for training
         self.training_env = None  # type: Union[gym.Env, VecEnv, None]
         # Number of time the callback was called
@@ -42,7 +47,6 @@ def __init__(self, verbose: int = 0):
         self.verbose = verbose
         self.locals: Dict[str, Any] = {}
         self.globals: Dict[str, Any] = {}
-        self.logger = None
         # Sometimes, for event callback, it is useful
         # to have access to the parent object
         self.parent = None  # type: Optional[BaseCallback]
diff --git a/stable_baselines3/common/envs/multi_input_envs.py b/stable_baselines3/common/envs/multi_input_envs.py
@@ -121,7 +121,7 @@ def init_possible_transitions(self) -> None:
         self.right_possible = [0, 1, 2, 12, 13, 14]
         self.up_possible = [4, 8, 12, 7, 11, 15]
 
-    def step(self, action: Union[int, float, np.ndarray]) -> GymStepReturn:
+    def step(self, action: Union[float, np.ndarray]) -> GymStepReturn:
         """
         Run one timestep of the environment's dynamics. When end of
         episode is reached, you are responsible for calling `reset()`
diff --git a/stable_baselines3/common/logger.py b/stable_baselines3/common/logger.py
@@ -5,7 +5,7 @@
 import tempfile
 import warnings
 from collections import defaultdict
-from typing import Any, Dict, List, Optional, Sequence, TextIO, Tuple, Union
+from typing import Any, Dict, List, Mapping, Optional, Sequence, TextIO, Tuple, Union
 
 import numpy as np
 import pandas
@@ -16,7 +16,7 @@
     from torch.utils.tensorboard import SummaryWriter
     from torch.utils.tensorboard.summary import hparams
 except ImportError:
-    SummaryWriter = None
+    SummaryWriter = None  # type: ignore[misc, assignment]
 
 try:
     from tqdm import tqdm
@@ -38,7 +38,7 @@ class Video:
     :param fps: frames per second
     """
 
-    def __init__(self, frames: th.Tensor, fps: Union[float, int]):
+    def __init__(self, frames: th.Tensor, fps: float):
         self.frames = frames
         self.fps = fps
 
@@ -80,7 +80,7 @@ class HParam:
         A non-empty metrics dict is required to display hyperparameters in the corresponding Tensorboard section.
     """
 
-    def __init__(self, hparam_dict: Dict[str, Union[bool, str, float, int, None]], metric_dict: Dict[str, Union[float, int]]):
+    def __init__(self, hparam_dict: Mapping[str, Union[bool, str, float, None]], metric_dict: Mapping[str, float]):
         self.hparam_dict = hparam_dict
         if not metric_dict:
             raise Exception("`metric_dict` must not be empty to display hyperparameters to the HPARAMS tensorboard tab.")
@@ -329,7 +329,7 @@ class CSVOutputFormat(KVWriter):
 
     def __init__(self, filename: str):
         self.file = open(filename, "w+t")
-        self.keys = []
+        self.keys: List[str] = []
         self.separator = ","
         self.quotechar = '"'
 
diff --git a/stable_baselines3/common/monitor.py b/stable_baselines3/common/monitor.py
@@ -5,7 +5,7 @@
 import os
 import time
 from glob import glob
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import gym
 import numpy as np
@@ -41,25 +41,26 @@ def __init__(
     ):
         super().__init__(env=env)
         self.t_start = time.time()
+        self.results_writer = None
         if filename is not None:
             self.results_writer = ResultsWriter(
                 filename,
                 header={"t_start": self.t_start, "env_id": env.spec and env.spec.id},
                 extra_keys=reset_keywords + info_keywords,
                 override_existing=override_existing,
             )
-        else:
-            self.results_writer = None
+
         self.reset_keywords = reset_keywords
         self.info_keywords = info_keywords
         self.allow_early_resets = allow_early_resets
-        self.rewards = None
+        self.rewards: List[float] = []
         self.needs_reset = True
-        self.episode_returns = []
-        self.episode_lengths = []
-        self.episode_times = []
+        self.episode_returns: List[float] = []
+        self.episode_lengths: List[int] = []
+        self.episode_times: List[float] = []
         self.total_steps = 0
-        self.current_reset_info = {}  # extra info about the current episode, that was passed in during reset()
+        # extra info about the current episode, that was passed in during reset()
+        self.current_reset_info: Dict[str, Any] = {}
 
     def reset(self, **kwargs) -> GymObs:
         """
@@ -200,7 +201,7 @@ def __init__(
 
         self.file_handler.flush()
 
-    def write_row(self, epinfo: Dict[str, Union[float, int]]) -> None:
+    def write_row(self, epinfo: Dict[str, float]) -> None:
         """
         Close the file handler
 
diff --git a/stable_baselines3/common/running_mean_std.py b/stable_baselines3/common/running_mean_std.py
@@ -1,4 +1,4 @@
-from typing import Tuple, Union
+from typing import Tuple
 
 import numpy as np
 
@@ -40,7 +40,7 @@ def update(self, arr: np.ndarray) -> None:
         batch_count = arr.shape[0]
         self.update_from_moments(batch_mean, batch_var, batch_count)
 
-    def update_from_moments(self, batch_mean: np.ndarray, batch_var: np.ndarray, batch_count: Union[int, float]) -> None:
+    def update_from_moments(self, batch_mean: np.ndarray, batch_var: np.ndarray, batch_count: float) -> None:
         delta = batch_mean - self.mean
         tot_count = self.count + batch_count
 
diff --git a/stable_baselines3/common/utils.py b/stable_baselines3/common/utils.py
@@ -76,7 +76,7 @@ def update_learning_rate(optimizer: th.optim.Optimizer, learning_rate: float) ->
         param_group["lr"] = learning_rate
 
 
-def get_schedule_fn(value_schedule: Union[Schedule, float, int]) -> Schedule:
+def get_schedule_fn(value_schedule: Union[Schedule, float]) -> Schedule:
     """
     Transform (if needed) learning rate and clip range (for PPO)
     to callable.
diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt
@@ -1 +1 @@
-1.8.0a0
+1.8.0a1
diff --git a/tests/test_tensorboard.py b/tests/test_tensorboard.py
@@ -1,4 +1,5 @@
 import os
+from typing import Dict, Union
 
 import pytest
 
@@ -18,21 +19,21 @@
 
 
 class HParamCallback(BaseCallback):
-    def __init__(self):
-        """
-        Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.
-        """
-        super().__init__()
+    """
+    Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.
+    """
 
     def _on_training_start(self) -> None:
-        hparam_dict = {
+        hparam_dict: Dict[str, Union[str, float]] = {
             "algorithm": self.model.__class__.__name__,
-            "learning rate": self.model.learning_rate,
-            "gamma": self.model.gamma,
+            # Ignore type checking for gamma, see https://github.com/DLR-RM/stable-baselines3/pull/1194/files#r1035006458
+            "gamma": self.model.gamma,  # type: ignore[attr-defined]
         }
+        if isinstance(self.model.learning_rate, float):  # Can also be Schedule, in that case, we don't report
+            hparam_dict["learning rate"] = self.model.learning_rate
         # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag
         # Tensorbaord will find & display metrics from the `SCALARS` tab
-        metric_dict = {
+        metric_dict: Dict[str, float] = {
             "rollout/ep_len_mean": 0,
         }
         self.logger.record(
diff --git a/tests/test_vec_normalize.py b/tests/test_vec_normalize.py
@@ -1,4 +1,5 @@
 import operator
+from typing import Any, Dict
 
 import gym
 import numpy as np
@@ -20,7 +21,7 @@
 
 
 class DummyRewardEnv(gym.Env):
-    metadata = {}
+    metadata: Dict[str, Any] = {}
 
     def __init__(self, return_reward_idx=0):
         self.action_space = spaces.Discrete(2)