cleanup

luccafong · luccafong · commit 04a487d24769 · 2025-09-20T19:26:39.000-07:00
Signed-off-by: Lu Fang &lt;fanglu@fb.com&gt;
diff --git a/examples/offline_inference/torchrun_dp_example.py b/examples/offline_inference/torchrun_dp_example.py
@@ -53,8 +53,6 @@
     generated_text = output.outputs[0].text
     print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}\n")
     print("-" * 50)
-
-del llm
 """
 Further tips:
 
diff --git a/tests/distributed/test_torchrun_example_moe.py b/tests/distributed/test_torchrun_example_moe.py
@@ -79,5 +79,3 @@ def test_consistent_across_ranks(obj):
     test_consistent_across_ranks(generated_text)
     print(f"Rank {group_rank}, Prompt: {prompt!r}, "
           f"Generated text: {generated_text!r}")
-
-del llm
diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
@@ -648,11 +648,3 @@ def copy_and_call(*args):
             return self.split_gm(*list_args)
 
         return copy_and_call
-
-    def __del__(self):
-        # cleanup the backend explicitly to avoid hanging
-        # before program exits
-        if hasattr(self, 'split_gm'):
-            del self.split_gm
-        if hasattr(self, 'piecewise_graphs'):
-            del self.piecewise_graphs
diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py
@@ -193,7 +193,6 @@ def _support_torch_compile(
     cls.__bases__ = cls.__bases__ + (TorchCompileWrapperWithCustomDispatcher, )
 
     old_init = cls.__init__
-    old_del = cls.__del__ if hasattr(cls, '__del__') else None
 
     setattr(cls, IGNORE_COMPILE_KEY, False)
 
@@ -215,17 +214,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = '', **kwargs):
         TorchCompileWrapperWithCustomDispatcher.__init__(
             self, compilation_level=vllm_config.compilation_config.level)
 
-    def __del__(self):
-        assert self is not None
-        if hasattr(self, 'backend'):
-            # cleanup the backend explicitly to avoid hanging
-            # before program exits
-            del self.backend
-        if old_del is not None:
-            old_del(self)
-
     cls.__init__ = __init__
-    cls.__del__ = __del__
 
     def __call__(self, *args, **kwargs):
         # torch.compiler.is_compiling() means we are inside the compilation
diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py
@@ -36,7 +36,6 @@ def __init__(self,
 
         vllm_config = get_current_vllm_config()
         self.vllm_config = vllm_config
-        self.backend = None
         if compiled_callable is None:
             # default compilation settings
             # compiling the forward method
diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
@@ -1006,7 +1006,7 @@ def graph_capture(device: torch.device):
     """
     context = GraphCaptureContext(torch.cuda.Stream(device=device))
     with get_tp_group().graph_capture(context), get_pp_group().graph_capture(
-            context), get_dp_group().graph_capture(context):
+            context):
         yield context
 
 
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -117,12 +117,6 @@
                                         AttnMetadataDict]
 
 
-def explicit_cleanup_submodule(model: nn.Module):
-    for module in model.children():
-        if hasattr(module, "__del__") and callable(module.__del__):
-            module.__del__()
-
-
 # Wrapper for ModelRunnerOutput to support overlapped execution.
 class AsyncGPUModelRunnerOutput(AsyncModelRunnerOutput):
 
@@ -4051,7 +4045,3 @@ def _to_list(self, sampled_token_ids: torch.Tensor) -> list[list[int]]:
         self.transfer_event.record()
         self.transfer_event.synchronize()
         return pinned.tolist()
-
-    def __del__(self):
-        if isinstance(self.model, nn.Module):
-            explicit_cleanup_submodule(self.model)