Adding FRASA to the projects page (#2059)

MarcDcls · araffin · web-flow · commit f432a6fcdc81 · 2024-12-17T14:53:07.000+01:00
* Adding FRASA to the projects page

* Updating changelog.rst

* Ignore mypy errors for np arrays (python 3.11+)

---------

Co-authored-by: Antonin Raffin &lt;antonin.raffin@dlr.de&gt;
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -39,6 +39,7 @@ Documentation:
 - Added Decisions and Dragons to resources. (@jmacglashan)
 - Updated PyBullet example, now compatible with Gymnasium
 - Added link to policies for ``policy_kwargs`` parameter (@kplers)
+- Added FRASA to the project page (@MarcDcls)
 
 Release 2.4.0 (2024-11-18)
 --------------------------
@@ -1739,4 +1740,4 @@ And all the contributors:
 @DavyMorgan @luizapozzobon @Bonifatius94 @theSquaredError @harveybellini @DavyMorgan @FieteO @jonasreiher @npit @WeberSamuel @troiganto
 @lutogniew @lbergmann1 @lukashass @BertrandDecoster @pseudo-rnd-thoughts @stefanbschneider @kyle-he @PatrickHelm @corentinlger
 @marekm4 @stagoverflow @rushitnshah @markscsmith @NickLucche @cschindlbeck @peteole @jak3122 @will-maclean
-@brn-dev @jmacglashan @kplers
+@brn-dev @jmacglashan @kplers @MarcDcls
diff --git a/docs/misc/projects.rst b/docs/misc/projects.rst
@@ -250,3 +250,19 @@ It enables solving environments involving partial observability or locomotion (e
 | Authors: Corentin Léger, Gautier Hamon, Eleni Nisioti, Xavier Hinaut, Clément Moulin-Frier
 | Github: https://github.com/corentinlger/ER-MRL
 | Paper: https://arxiv.org/abs/2312.06695
+
+
+FRASA: Fall Recovery And Stand up agent
+---------------------------------------
+
+A Deep Reinforcement Learning agent for a humanoid robot that learns to recover from falls and stand up. 
+
+The agent is trained using the MuJoCo physics engine. Real world experiments are conducted on the 
+Sigmaban humanoid robot, a small-sized humanoid designed by the *Rhoban Team* to compete in the RoboCup Kidsize League.
+The results, detailled in the paper and the video, show that the agent is able to recover from 
+various external disturbances and stand up in a few seconds.
+
+Authors: Marc Duclusaud, Clément Gaspard, Grégoire Passault, Mélodie Daniel, Olivier Ly
+Github: https://github.com/Rhoban/frasa
+Paper: https://arxiv.org/abs/2410.08655
+Video: https://www.youtube.com/watch?v=NL65XW0O0mk
diff --git a/stable_baselines3/common/callbacks.py b/stable_baselines3/common/callbacks.py
@@ -490,7 +490,7 @@ def _on_step(self) -> bool:
                     timesteps=self.evaluations_timesteps,
                     results=self.evaluations_results,
                     ep_lengths=self.evaluations_length,
-                    **kwargs,
+                    **kwargs,  # type: ignore[arg-type]
                 )
 
             mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
diff --git a/stable_baselines3/common/envs/bit_flipping_env.py b/stable_baselines3/common/envs/bit_flipping_env.py
@@ -103,7 +103,7 @@ def convert_to_bit_vector(self, state: Union[int, np.ndarray], batch_size: int)
             # Convert to binary representation
             bit_vector = ((bit_vector[:, :] & (1 << np.arange(len(self.state)))) > 0).astype(int)
         elif self.image_obs_space:
-            bit_vector = state.reshape(batch_size, -1)[:, : len(self.state)] / 255
+            bit_vector = state.reshape(batch_size, -1)[:, : len(self.state)] / 255  # type: ignore[assignment]
         else:
             bit_vector = np.array(state).reshape(batch_size, -1)
         return bit_vector
diff --git a/stable_baselines3/common/off_policy_algorithm.py b/stable_baselines3/common/off_policy_algorithm.py
@@ -487,7 +487,7 @@ def _store_transition(
                     next_obs[i] = infos[i]["terminal_observation"]
                     # VecNormalize normalizes the terminal observation
                     if self._vec_normalize_env is not None:
-                        next_obs[i] = self._vec_normalize_env.unnormalize_obs(next_obs[i, :])
+                        next_obs[i] = self._vec_normalize_env.unnormalize_obs(next_obs[i, :])  # type: ignore[assignment]
 
         replay_buffer.add(
             self._last_original_obs,  # type: ignore[arg-type]
diff --git a/stable_baselines3/common/vec_env/base_vec_env.py b/stable_baselines3/common/vec_env/base_vec_env.py
@@ -43,7 +43,7 @@ def tile_images(images_nhwc: Sequence[np.ndarray]) -> np.ndarray:  # pragma: no
     # img_HhWwc
     out_image = out_image.transpose(0, 2, 1, 3, 4)
     # img_Hh_Ww_c
-    out_image = out_image.reshape((new_height * height, new_width * width, n_channels))
+    out_image = out_image.reshape((new_height * height, new_width * width, n_channels))  # type: ignore[assignment]
     return out_image
 
 
diff --git a/stable_baselines3/common/vec_env/dummy_vec_env.py b/stable_baselines3/common/vec_env/dummy_vec_env.py
@@ -56,7 +56,7 @@ def step_async(self, actions: np.ndarray) -> None:
     def step_wait(self) -> VecEnvStepReturn:
         # Avoid circular imports
         for env_idx in range(self.num_envs):
-            obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step(
+            obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step(  # type: ignore[assignment]
                 self.actions[env_idx]
             )
             # convert to SB3 VecEnv api
diff --git a/stable_baselines3/her/her_replay_buffer.py b/stable_baselines3/her/her_replay_buffer.py
@@ -157,7 +157,7 @@ def add(  # type: ignore[override]
         self.ep_start[self.pos] = self._current_ep_start.copy()
 
         if self.copy_info_dict:
-            self.infos[self.pos] = infos
+            self.infos[self.pos] = infos  # type: ignore[assignment]
         # Store the transition
         super().add(obs, next_obs, action, reward, done, infos)
 

Original file line number	Diff line number	Diff line change
`@@ -490,7 +490,7 @@ def _on_step(self) -> bool:`
`490`	`490`	`timesteps=self.evaluations_timesteps,`
`491`	`491`	`results=self.evaluations_results,`
`492`	`492`	`ep_lengths=self.evaluations_length,`
`493`		`- **kwargs,`
	`493`	`+ **kwargs, # type: ignore[arg-type]`
`494`	`494`	`)`
`495`	`495`
`496`	`496`	`mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)`
Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@ def step_async(self, actions: np.ndarray) -> None:`
`56`	`56`	`def step_wait(self) -> VecEnvStepReturn:`
`57`	`57`	`# Avoid circular imports`
`58`	`58`	`for env_idx in range(self.num_envs):`
`59`		`- obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step(`
	`59`	`+ obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step( # type: ignore[assignment]`
`60`	`60`	`self.actions[env_idx]`
`61`	`61`	`)`
`62`	`62`	`# convert to SB3 VecEnv api`