Fix returned type in predict (#964)

qgallouedec · araffin · web-flow · commit fda3d4d74843 · 2022-07-18T11:22:19.000+02:00
* `arr[0]` to `arr.squeeze(0)`

* `squeeze(axis=0)` to `squeeze(0)`

* Type testing

* Add type test for unvectorized observation

* `squeeze(0)` to `squeeze(axis=0)`

* Treatment of the laziness symptoms

* Update changelog

* Udate changelog

Co-authored-by: Antonin RAFFIN &lt;antonin.raffin@ensta.org&gt;
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -17,6 +17,7 @@ SB3-Contrib
 
 Bug Fixes:
 ^^^^^^^^^^
+- Fixed the issue that ``predict`` does not always return action as ``np.ndarray`` (@qgallouedec)
 
 Deprecations:
 ^^^^^^^^^^^^^
@@ -1011,4 +1012,4 @@ And all the contributors:
 @eleurent @ac-93 @cove9988 @theDebugger811 @hsuehch @Demetrio92 @thomasgubler @IperGiove @ScheiklP
 @simoninithomas @armandpl @manuel-delverme @Gautam-J @gianlucadecola @buoyancy99 @caburu @xy9485
 @Gregwar @ycheng517 @quantitative-technologies @bcollazo @git-thor @TibiGG @cool-RR @MWeltevrede
-@Melanol
+@Melanol @qgallouedec
diff --git a/stable_baselines3/common/distributions.py b/stable_baselines3/common/distributions.py
@@ -578,10 +578,10 @@ def get_noise(self, latent_sde: th.Tensor) -> th.Tensor:
             return th.mm(latent_sde, self.exploration_mat)
         # Use batch matrix multiplication for efficient computation
         # (batch_size, n_features) -> (batch_size, 1, n_features)
-        latent_sde = latent_sde.unsqueeze(1)
+        latent_sde = latent_sde.unsqueeze(dim=1)
         # (batch_size, 1, n_actions)
         noise = th.bmm(latent_sde, self.exploration_matrices)
-        return noise.squeeze(1)
+        return noise.squeeze(dim=1)
 
     def actions_from_params(
         self, mean_actions: th.Tensor, log_std: th.Tensor, latent_sde: th.Tensor, deterministic: bool = False
diff --git a/stable_baselines3/common/policies.py b/stable_baselines3/common/policies.py
@@ -350,7 +350,7 @@ def predict(
 
         # Remove batch dimension if needed
         if not vectorized_env:
-            actions = actions[0]
+            actions = actions.squeeze(axis=0)
 
         return actions, state
 
diff --git a/tests/test_predict.py b/tests/test_predict.py
@@ -73,11 +73,13 @@ def test_predict(model_class, env_id, device):
 
     obs = env.reset()
     action, _ = model.predict(obs)
+    assert isinstance(action, np.ndarray)
     assert action.shape == env.action_space.shape
     assert env.action_space.contains(action)
 
     vec_env_obs = vec_env.reset()
     action, _ = model.predict(vec_env_obs)
+    assert isinstance(action, np.ndarray)
     assert action.shape[0] == vec_env_obs.shape[0]
 
     # Special case for DQN to check the epsilon greedy exploration