Release v1.6.0 (#958)

araffin · web-flow · commit c1f1c3d3d796 · 2022-07-12T22:50:23.000+02:00
* Release v1.6.0 + update doc + add copy button

* Update read the doc conda env

* Update year

* Fix bug in kl divergence check

* Rephrase requirement for envpool and isaac gym
diff --git a/docs/conda_env.yml b/docs/conda_env.yml
@@ -6,15 +6,15 @@ dependencies:
   - cpuonly=1.0=0
   - pip=21.1
   - python=3.7
-  - pytorch=1.8.1=py3.7_cpu_0
+  - pytorch=1.11=py3.7_cpu_0
   - pip:
-    - gym>=0.17.2
+    - gym==0.21
     - cloudpickle
     - opencv-python-headless
     - pandas
     - numpy
     - matplotlib
     - sphinx_autodoc_typehints
     - sphinx>=4.2
-    # See https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
     - sphinx_rtd_theme>=1.0
+    - sphinx_copybutton
diff --git a/docs/conf.py b/docs/conf.py
@@ -24,6 +24,14 @@
 except ImportError:
     enable_spell_check = False
 
+# Try to enable copy button
+try:
+    import sphinx_copybutton  # noqa: F401
+
+    enable_copy_button = True
+except ImportError:
+    enable_copy_button = False
+
 # source code directory, relative to this file, for sphinx-autobuild
 sys.path.insert(0, os.path.abspath(".."))
 
@@ -51,7 +59,7 @@ def __getattr__(cls, name):
 # -- Project information -----------------------------------------------------
 
 project = "Stable Baselines3"
-copyright = "2020, Stable Baselines3"
+copyright = "2022, Stable Baselines3"
 author = "Stable Baselines3 Contributors"
 
 # The short X.Y version
@@ -83,6 +91,9 @@ def __getattr__(cls, name):
 if enable_spell_check:
     extensions.append("sphinxcontrib.spelling")
 
+if enable_copy_button:
+    extensions.append("sphinx_copybutton")
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
 
diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst
@@ -729,6 +729,16 @@ to keep track of the agent progress.
   model.learn(10_000)
 
 
+SB3 with EnvPool or Isaac Gym
+-----------------------------
+
+Just like Procgen (see above), `EnvPool <https://github.com/sail-sg/envpool>`_ and `Isaac Gym <https://github.com/NVIDIA-Omniverse/IsaacGymEnvs>`_ accelerate the environment by
+already providing a vectorized implementation.
+
+To use SB3 with those tools, you must wrap the env with tool's specific ``VecEnvWrapper`` that will pre-process the data for SB3,
+you can find links to those wrappers in `issue #772 <https://github.com/DLR-RM/stable-baselines3/issues/772#issuecomment-1048657002>`_.
+
+
 Record a Video
 --------------
 
diff --git a/docs/guide/install.rst b/docs/guide/install.rst
@@ -54,6 +54,17 @@ Bleeding-edge version
 	pip install git+https://github.com/DLR-RM/stable-baselines3
 
 
+.. note::
+
+  If you want to use latest gym version (0.24+), you have to use
+
+	.. code-block:: bash
+
+		pip install git+https://github.com/carlosluis/stable-baselines3/tree/fix_tests
+
+  See `PR #780 <https://github.com/DLR-RM/stable-baselines3/pull/780>`_ for more information.
+
+
 Development version
 -------------------
 
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -4,9 +4,11 @@ Changelog
 ==========
 
 
-Release 1.5.1a9 (WIP)
+Release 1.6.0 (2022-07-11)
 ---------------------------
 
+**Recurrent PPO (PPO LSTM), better defaults for learning from pixels with SAC/TD3**
+
 Breaking Changes:
 ^^^^^^^^^^^^^^^^^
 - Changed the way policy "aliases" are handled ("MlpPolicy", "CnnPolicy", ...), removing the former
@@ -34,6 +36,7 @@ Bug Fixes:
 - Fixed issues due to newer version of protobuf (tensorboard) and sphinx
 - Fix exception causes all over the codebase (@cool-RR)
 - Prohibit simultaneous use of optimize_memory_usage and handle_timeout_termination due to a bug (@MWeltevrede)
+- Fixed a bug in ``kl_divergence`` check that would fail when using numpy arrays with MultiCategorical distribution
 
 Deprecations:
 ^^^^^^^^^^^^^
@@ -51,6 +54,8 @@ Documentation:
 - Added remark about breaking Markov assumption and timeout handling
 - Added doc about MLFlow integration via custom logger (@git-thor)
 - Updated Huggingface integration doc
+- Added copy button for code snippets
+- Added doc about EnvPool and Isaac Gym support
 
 
 Release 1.5.0 (2022-03-25)
diff --git a/setup.py b/setup.py
@@ -111,6 +111,8 @@
             "sphinxcontrib.spelling",
             # Type hints support
             "sphinx-autodoc-typehints",
+            # Copy button for code snippets
+            "sphinx_copybutton",
         ],
         "extra": [
             # For render
diff --git a/stable_baselines3/common/buffers.py b/stable_baselines3/common/buffers.py
@@ -193,7 +193,8 @@ def __init__(
         # see https://github.com/DLR-RM/stable-baselines3/issues/934
         if optimize_memory_usage and handle_timeout_termination:
             raise ValueError(
-                "ReplayBuffer does not support optimize_memory_usage = True and handle_timeout_termination = True simultaneously."
+                "ReplayBuffer does not support optimize_memory_usage = True "
+                "and handle_timeout_termination = True simultaneously."
             )
         self.optimize_memory_usage = optimize_memory_usage
 
diff --git a/stable_baselines3/common/distributions.py b/stable_baselines3/common/distributions.py
@@ -4,6 +4,7 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import gym
+import numpy as np
 import torch as th
 from gym import spaces
 from torch import nn
@@ -688,7 +689,7 @@ def kl_divergence(dist_true: Distribution, dist_pred: Distribution) -> th.Tensor
     # MultiCategoricalDistribution is not a PyTorch Distribution subclass
     # so we need to implement it ourselves!
     if isinstance(dist_pred, MultiCategoricalDistribution):
-        assert dist_pred.action_dims == dist_true.action_dims, "Error: distributions must have the same input space"
+        assert np.allclose(dist_pred.action_dims, dist_true.action_dims), "Error: distributions must have the same input space"
         return th.stack(
             [th.distributions.kl_divergence(p, q) for p, q in zip(dist_true.distribution, dist_pred.distribution)],
             dim=1,
diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt
@@ -1 +1 @@
-1.5.1a9
+1.6.0
diff --git a/tests/test_distributions.py b/tests/test_distributions.py
@@ -163,7 +163,9 @@ def test_categorical(dist, CAT_ACTIONS):
         BernoulliDistribution(N_ACTIONS).proba_distribution(th.rand(N_ACTIONS)),
         CategoricalDistribution(N_ACTIONS).proba_distribution(th.rand(N_ACTIONS)),
         DiagGaussianDistribution(N_ACTIONS).proba_distribution(th.rand(N_ACTIONS), th.rand(N_ACTIONS)),
-        MultiCategoricalDistribution([N_ACTIONS, N_ACTIONS]).proba_distribution(th.rand(1, sum([N_ACTIONS, N_ACTIONS]))),
+        MultiCategoricalDistribution(np.array([N_ACTIONS, N_ACTIONS])).proba_distribution(
+            th.rand(1, sum([N_ACTIONS, N_ACTIONS]))
+        ),
         SquashedDiagGaussianDistribution(N_ACTIONS).proba_distribution(th.rand(N_ACTIONS), th.rand(N_ACTIONS)),
         StateDependentNoiseDistribution(N_ACTIONS).proba_distribution(
             th.rand(N_ACTIONS), th.rand([N_ACTIONS, N_ACTIONS]), th.rand([N_ACTIONS, N_ACTIONS])

Original file line number	Diff line number	Diff line change
`@@ -193,7 +193,8 @@ def __init__(`
`193`	`193`	`# see https://github.com/DLR-RM/stable-baselines3/issues/934`
`194`	`194`	`if optimize_memory_usage and handle_timeout_termination:`
`195`	`195`	`raise ValueError(`
`196`		`- "ReplayBuffer does not support optimize_memory_usage = True and handle_timeout_termination = True simultaneously."`
	`196`	`+ "ReplayBuffer does not support optimize_memory_usage = True "`
	`197`	`+ "and handle_timeout_termination = True simultaneously."`
`197`	`198`	`)`
`198`	`199`	`self.optimize_memory_usage = optimize_memory_usage`
`199`	`200`