From 7b7123825048e657f1efa8342389171ecf61d6bf Mon Sep 17 00:00:00 2001
From: Reece Palmer <reecepalmer@gmail.com>
Date: Wed, 19 Nov 2025 14:31:29 +0000
Subject: [PATCH 1/2] Fix: Add close() method to BaseAlgorithm to prevent
 memory leaks

Introduces a `close()` method in BaseAlgorithm to explicitly clean up resources. This method closes the environment, deletes the policy and rollout buffer objects, and calls `torch.cuda.empty_cache()` and `gc.collect()`.

This prevents increasing memory usage and Out-of-Memory (OOM) errors in sequential training loops, relative to #1966.
---
 docs/misc/changelog.rst                |  1 +
 stable_baselines3/common/base_class.py | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index fb6b9f281..52fb1916f 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -19,6 +19,7 @@ Bug Fixes:
 - Update env checker to warn users when using Graph space (@dhruvmalik007).
 - Fixed memory leak in ``VecVideoRecorder`` where ``recorded_frames`` stayed in memory due to reference in the moviepy clip (@copilot)
 - Remove double space in `StopTrainingOnRewardThreshold` callback message (@sea-bass)
+- Add close method to BaseAlgorithm to prevent memory leaks in sequential training loops (#1966)
 
 `SB3-Contrib`_
 ^^^^^^^^^^^^^^
diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py
index f2c205166..47f319f62 100644
--- a/stable_baselines3/common/base_class.py
+++ b/stable_baselines3/common/base_class.py
@@ -1,5 +1,6 @@
 """Abstract base classes for RL algorithms."""
 
+import gc
 import io
 import pathlib
 import time
@@ -472,6 +473,7 @@ def get_vec_normalize_env(self) -> Optional[VecNormalize]:
         :return: The ``VecNormalize`` env.
         """
         return self._vec_normalize_env
+    
 
     def set_env(self, env: GymEnv, force_reset: bool = True) -> None:
         """
@@ -866,6 +868,30 @@ def save(
 
         save_to_zip_file(path, data=data, params=params_to_save, pytorch_variables=pytorch_variables)
 
+    def close(self) -> None:
+        """
+        Clean up resources after training or prediction to prevent memory leaks
+        when calling :meth:`learn()` repeatedly with new environments.
+
+        Fixes https://github.com/DLR-RM/stable-baselines3/issues/1996 
+        """
+        if self.env is not None:
+            self.env.close()
+            self.env = None
+
+        if hasattr(self, "rollout_buffer") and self.rollout_buffer is not None:
+            del self.rollout_buffer
+            self.rollout_buffer = None
+
+        if hasattr(self, "policy") and self.policy is not None:
+            del self.policy
+            self.policy = None
+
+        if self.device.type == "cuda":
+            th.cuda.empty_cache()
+
+        gc.collect()
+
     def dump_logs(self) -> None:
         """
         Write log data. (Implemented by OffPolicyAlgorithm and OnPolicyAlgorithm)

From f9c53e4f3e9830278b5d0e908b309ddbd37c8876 Mon Sep 17 00:00:00 2001
From: Reece Palmer <reecepalmer@gmail.com>
Date: Wed, 19 Nov 2025 15:09:23 +0000
Subject: [PATCH 2/2] Fix: Add close() method to BaseAlgorithm to prevent
 memory leaks

Introduces a `close()` method in BaseAlgorithm to explicitly clean up resources. This method closes the environment, deletes the policy and rollout buffer objects, and calls `torch.cuda.empty_cache()` and `gc.collect()`.

This prevents increasing memory usage and Out-of-Memory (OOM) errors in sequential training loops, relative to #1966.
---
 stable_baselines3/common/base_class.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py
index 47f319f62..e4fbd00f1 100644
--- a/stable_baselines3/common/base_class.py
+++ b/stable_baselines3/common/base_class.py
@@ -473,7 +473,6 @@ def get_vec_normalize_env(self) -> Optional[VecNormalize]:
         :return: The ``VecNormalize`` env.
         """
         return self._vec_normalize_env
-    
 
     def set_env(self, env: GymEnv, force_reset: bool = True) -> None:
         """
@@ -873,7 +872,7 @@ def close(self) -> None:
         Clean up resources after training or prediction to prevent memory leaks
         when calling :meth:`learn()` repeatedly with new environments.
 
-        Fixes https://github.com/DLR-RM/stable-baselines3/issues/1996 
+        Fixes https://github.com/DLR-RM/stable-baselines3/issues/1996
         """
         if self.env is not None:
             self.env.close()