From 7b7123825048e657f1efa8342389171ecf61d6bf Mon Sep 17 00:00:00 2001 From: Reece Palmer Date: Wed, 19 Nov 2025 14:31:29 +0000 Subject: [PATCH 1/2] Fix: Add close() method to BaseAlgorithm to prevent memory leaks Introduces a `close()` method in BaseAlgorithm to explicitly clean up resources. This method closes the environment, deletes the policy and rollout buffer objects, and calls `torch.cuda.empty_cache()` and `gc.collect()`. This prevents increasing memory usage and Out-of-Memory (OOM) errors in sequential training loops, relative to #1966. --- docs/misc/changelog.rst | 1 + stable_baselines3/common/base_class.py | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index fb6b9f281..52fb1916f 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -19,6 +19,7 @@ Bug Fixes: - Update env checker to warn users when using Graph space (@dhruvmalik007). - Fixed memory leak in ``VecVideoRecorder`` where ``recorded_frames`` stayed in memory due to reference in the moviepy clip (@copilot) - Remove double space in `StopTrainingOnRewardThreshold` callback message (@sea-bass) +- Add close method to BaseAlgorithm to prevent memory leaks in sequential training loops (#1966) `SB3-Contrib`_ ^^^^^^^^^^^^^^ diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py index f2c205166..47f319f62 100644 --- a/stable_baselines3/common/base_class.py +++ b/stable_baselines3/common/base_class.py @@ -1,5 +1,6 @@ """Abstract base classes for RL algorithms.""" +import gc import io import pathlib import time @@ -472,6 +473,7 @@ def get_vec_normalize_env(self) -> Optional[VecNormalize]: :return: The ``VecNormalize`` env. """ return self._vec_normalize_env + def set_env(self, env: GymEnv, force_reset: bool = True) -> None: """ @@ -866,6 +868,30 @@ def save( save_to_zip_file(path, data=data, params=params_to_save, pytorch_variables=pytorch_variables) + def close(self) -> None: + """ + Clean up resources after training or prediction to prevent memory leaks + when calling :meth:`learn()` repeatedly with new environments. + + Fixes https://github.com/DLR-RM/stable-baselines3/issues/1996 + """ + if self.env is not None: + self.env.close() + self.env = None + + if hasattr(self, "rollout_buffer") and self.rollout_buffer is not None: + del self.rollout_buffer + self.rollout_buffer = None + + if hasattr(self, "policy") and self.policy is not None: + del self.policy + self.policy = None + + if self.device.type == "cuda": + th.cuda.empty_cache() + + gc.collect() + def dump_logs(self) -> None: """ Write log data. (Implemented by OffPolicyAlgorithm and OnPolicyAlgorithm) From f9c53e4f3e9830278b5d0e908b309ddbd37c8876 Mon Sep 17 00:00:00 2001 From: Reece Palmer Date: Wed, 19 Nov 2025 15:09:23 +0000 Subject: [PATCH 2/2] Fix: Add close() method to BaseAlgorithm to prevent memory leaks Introduces a `close()` method in BaseAlgorithm to explicitly clean up resources. This method closes the environment, deletes the policy and rollout buffer objects, and calls `torch.cuda.empty_cache()` and `gc.collect()`. This prevents increasing memory usage and Out-of-Memory (OOM) errors in sequential training loops, relative to #1966. --- stable_baselines3/common/base_class.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py index 47f319f62..e4fbd00f1 100644 --- a/stable_baselines3/common/base_class.py +++ b/stable_baselines3/common/base_class.py @@ -473,7 +473,6 @@ def get_vec_normalize_env(self) -> Optional[VecNormalize]: :return: The ``VecNormalize`` env. """ return self._vec_normalize_env - def set_env(self, env: GymEnv, force_reset: bool = True) -> None: """ @@ -873,7 +872,7 @@ def close(self) -> None: Clean up resources after training or prediction to prevent memory leaks when calling :meth:`learn()` repeatedly with new environments. - Fixes https://github.com/DLR-RM/stable-baselines3/issues/1996 + Fixes https://github.com/DLR-RM/stable-baselines3/issues/1996 """ if self.env is not None: self.env.close()