Release v2.0.0 (#388)

araffin · web-flow · commit 07f74478ae20 · 2023-06-23T14:42:17.000+02:00
* Release v2.0.0

* Reformat
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,12 +1,18 @@
-## Release 2.0.0a12 (WIP)
+## Release 2.0.0 (2023-06-22)
+
+**Gymnasium support**
+
+> **Warning**
+> Stable-Baselines3 (SB3) v2.0.0 will be the last one supporting python 3.7
 
 ### Breaking Changes
-- Upgraded to gym 0.26+
 - Fixed bug in HistoryWrapper, now returns the correct obs space limits
 - Upgraded to SB3 >= 2.0.0
 - Upgraded to Huggingface-SB3 >= 2.2.5
+- Upgraded to Gym API 0.26+, RL Zoo3 doesn't work anymore with Gym 0.21
 
 ### New Features
+- Added Gymnasium support
 - Gym 0.26+ patches to continue working with pybullet and TimeLimit wrapper
 
 ### Bug fixes
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 gym==0.26.2
-stable-baselines3[extra_no_roms,tests,docs]>=2.0.0a13
-sb3-contrib>=2.0.0a13
+stable-baselines3[extra_no_roms,tests,docs]>=2.0.0
+sb3-contrib>=2.0.0
 box2d-py==2.3.8
 pybullet
 # minigrid
diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt
@@ -1 +1 @@
-2.0.0a13
+2.0.0
diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, SupportsFloat, Tuple
+from typing import Any, ClassVar, Dict, Optional, SupportsFloat, Tuple
 
 import gymnasium as gym
 import numpy as np
@@ -299,7 +299,7 @@ class MaskVelocityWrapper(gym.ObservationWrapper):
     """
 
     # Supported envs
-    velocity_indices = {
+    velocity_indices: ClassVar[Dict[str, np.ndarray]] = {
         "CartPole-v1": np.array([1, 3]),
         "MountainCar-v0": np.array([1]),
         "MountainCarContinuous-v0": np.array([1]),
diff --git a/setup.py b/setup.py
@@ -27,8 +27,8 @@
     },
     entry_points={"console_scripts": ["rl_zoo3=rl_zoo3.cli:main"]},
     install_requires=[
-        "sb3_contrib>=2.0.0a13",
-        "gym==0.26.2",
+        "sb3_contrib>=2.0.0",
+        "gym==0.26.2",  # for patches to make gym backward compat
         "huggingface_sb3>=2.2.5",
         "tqdm",
         "rich",
@@ -52,6 +52,15 @@
     version=__version__,
     python_requires=">=3.7",
     # PyPI package information.
+    # PyPI package information.
+    project_urls={
+        "Code": "https://github.com/DLR-RM/rl-baselines3-zoo",
+        "Documentation": "https://rl-baselines3-zoo.readthedocs.io/en/master/",
+        "Changelog": "https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md",
+        "Stable-Baselines3": "https://github.com/DLR-RM/stable-baselines3",
+        "RL-Zoo": "https://github.com/DLR-RM/rl-baselines3-zoo",
+        "SBX": "https://github.com/araffin/sbx",
+    },
     classifiers=[
         "Programming Language :: Python :: 3",
         "Programming Language :: Python :: 3.7",
diff --git a/tests/dummy_env/test_env/test_env.py b/tests/dummy_env/test_env/test_env.py
@@ -1,10 +1,12 @@
+from typing import ClassVar
+
 import gymnasium as gym
 import numpy as np
 from gymnasium import spaces
 
 
 class TestEnv(gym.Env):
-    metadata = {"render_modes": ["human"], "render_fps": 4}
+    metadata: ClassVar[dict] = {"render_modes": ["human"], "render_fps": 4}
     __test__ = False
 
     def __init__(self, render_mode=None):
diff --git a/tests/test_enjoy.py b/tests/test_enjoy.py
@@ -87,15 +87,15 @@ def test_load(tmp_path):
     # Load best model
     args = ["-n", str(N_STEPS), "-f", tmp_path, "--algo", algo, "--env", env_id, "--no-render"]
     # Test with progress bar
-    return_code = subprocess.call(["python", "enjoy.py", *args] + ["--load-best", "-P"])
+    return_code = subprocess.call(["python", "enjoy.py", *args, "--load-best", "-P"])
     _assert_eq(return_code, 0)
 
     # Load checkpoint
-    return_code = subprocess.call(["python", "enjoy.py", *args] + ["--load-checkpoint", str(500)])
+    return_code = subprocess.call(["python", "enjoy.py", *args, "--load-checkpoint", str(500)])
     _assert_eq(return_code, 0)
 
     # Load last checkpoint
-    return_code = subprocess.call(["python", "enjoy.py", *args] + ["--load-last-checkpoint"])
+    return_code = subprocess.call(["python", "enjoy.py", *args, "--load-last-checkpoint"])
     _assert_eq(return_code, 0)