Add cuda_gc in decomposer and var renamer

JewelRoam · JewelRoam · commit fb7ecd66e59f · 2025-12-18T16:26:27.000+08:00
diff --git a/graph_net/torch/decompose_util.py b/graph_net/torch/decompose_util.py
@@ -1,10 +1,22 @@
 import torch
 import copy
 import operator
+import gc
+from contextlib import contextmanager
 from collections import defaultdict
 from dataclasses import dataclass
 
 
+@contextmanager
+def cuda_gc(enabled: bool = True):
+    try:
+        yield
+    finally:
+        if enabled:
+            gc.collect()
+            torch.cuda.empty_cache()
+
+
 def convert_to_submodules_graph(
     gm: torch.fx.GraphModule,
     split_positions: list[int],
diff --git a/graph_net/torch/graph_decomposer.py b/graph_net/torch/graph_decomposer.py
@@ -4,23 +4,24 @@
 import torch
 import json
 import sys
-from graph_net.torch.decompose_util import convert_to_submodules_graph
+
+from graph_net.torch.decompose_util import convert_to_submodules_graph, cuda_gc
 from graph_net.torch.extractor import GraphExtractor as BuiltinGraphExtractor
 import graph_net.imp_util as imp_util
 from graph_net.torch.fx_graph_module_util import get_torch_module_and_inputs
 from graph_net.torch.fx_graph_cache_util import (
     parse_immutable_model_path_into_sole_graph_module,
 )
 from graph_net.torch.fx_graph_parse_util import parse_sole_graph_module
+
 import logging
 
 logger = logging.getLogger(__name__)
 
 
 def load_json(file_path):
-    with open(file_path, "r", encoding="utf-8") as file:
-        data_dict = json.load(file)
-    return data_dict
+    with open(file_path, "r", encoding="utf-8") as f:
+        return json.load(f)
 
 
 class GraphExtractor:
@@ -221,20 +222,27 @@ def __call__(self, rel_model_path):
             rel_model_path, split_positions
         ):
             return
-        torch.cuda.empty_cache()
-        config = {
-            "split_positions": split_positions,
-            "group_head_and_tail": self.config.get("group_head_and_tail", False),
-            "chain_style": self.config.get("chain_style", False),
-        }
-        module, inputs = get_torch_module_and_inputs(model_path, use_dummy_inputs=False)
+
+        with cuda_gc():
+            module, inputs = get_torch_module_and_inputs(
+                model_path, use_dummy_inputs=False
+            )
         gm = parse_sole_graph_module(module, inputs)
-        rewrited_gm: torch.fx.GraphModule = convert_to_submodules_graph(
-            gm,
-            submodule_hook=self.get_naive_decomposer_extractor(rel_model_path),
-            **config,
-        )
-        rewrited_gm(*inputs)
+        del module
+
+        with cuda_gc():
+            rewrited_gm: torch.fx.GraphModule = convert_to_submodules_graph(
+                gm,
+                submodule_hook=self.get_naive_decomposer_extractor(rel_model_path),
+                split_positions=split_positions,
+                group_head_and_tail=self.config.get("group_head_and_tail", False),
+                chain_style=self.config.get("chain_style", False),
+            )
+            rewrited_gm(*inputs)
+        del inputs, rewrited_gm
+
+        with cuda_gc():
+            pass
 
     def get_naive_decomposer_extractor(self, rel_model_path):
         def fn(submodule, seq_no):
diff --git a/graph_net/torch/graph_variable_renamer.py b/graph_net/torch/graph_variable_renamer.py
@@ -3,13 +3,15 @@
 import shutil
 import inspect
 import tempfile
+
 from graph_net.torch.fx_graph_module_util import get_torch_module_and_inputs
 from graph_net.torch.fx_graph_parse_util import parse_sole_graph_module
 from graph_net.tensor_meta import TensorMeta
 from pathlib import Path
 from graph_net.torch.utils import apply_templates
 from graph_net.imp_util import load_module
 from graph_net.hash_util import get_sha256_hash
+from graph_net.torch.decompose_util import cuda_gc
 
 
 class GraphVariableRenamer:
@@ -79,9 +81,11 @@ def _make_config(
 
     def __call__(self, rel_model_path):
         src_model_path = os.path.join(self.config["model_path_prefix"], rel_model_path)
-        module, inputs = get_torch_module_and_inputs(src_model_path)
-        gm = parse_sole_graph_module(module, inputs)
-        gm = self.rename_graph_variables(gm, inputs, src_model_path)
+        with cuda_gc(enabled=self.config["release_gpu_memory"]):
+            module, inputs = get_torch_module_and_inputs(src_model_path)
+            gm = parse_sole_graph_module(module, inputs)
+            gm = self.rename_graph_variables(gm, inputs, src_model_path)
+            del module, inputs
         dst_model_path = os.path.realpath(
             os.path.join(self.config["output_dir"], rel_model_path)
         )