Add torch.cuda.empty_cache() in decompose process (#455)

JewelRoam · web-flow · commit a992aa5eafd8 · 2025-12-19T17:54:52.000+08:00
* Add cuda_gc in decomposer and var renamer

* Add cuda_gc in typical_sequence_split_points

* Fix resume in graph_variable_renamer to early exit

* Minor fix

* Simplify
diff --git a/graph_net/torch/graph_decomposer.py b/graph_net/torch/graph_decomposer.py
@@ -4,6 +4,7 @@
 import torch
 import json
 import sys
+
 from graph_net.torch.decompose_util import convert_to_submodules_graph
 from graph_net.torch.extractor import GraphExtractor as BuiltinGraphExtractor
 import graph_net.imp_util as imp_util
@@ -12,15 +13,15 @@
     parse_immutable_model_path_into_sole_graph_module,
 )
 from graph_net.torch.fx_graph_parse_util import parse_sole_graph_module
+
 import logging
 
 logger = logging.getLogger(__name__)
 
 
 def load_json(file_path):
-    with open(file_path, "r", encoding="utf-8") as file:
-        data_dict = json.load(file)
-    return data_dict
+    with open(file_path, "r", encoding="utf-8") as f:
+        return json.load(f)
 
 
 class GraphExtractor:
@@ -242,19 +243,19 @@ def __call__(self, rel_model_path):
             rel_model_path, split_positions, subgraph_ranges
         ):
             return
+
         torch.cuda.empty_cache()
-        config = {
-            "split_positions": split_positions,
-            "subgraph_ranges": subgraph_ranges,
-            "group_head_and_tail": self.config.get("group_head_and_tail", False),
-            "chain_style": self.config.get("chain_style", False),
-        }
         module, inputs = get_torch_module_and_inputs(model_path, use_dummy_inputs=False)
         gm = parse_sole_graph_module(module, inputs)
+
+        torch.cuda.empty_cache()
         rewrited_gm: torch.fx.GraphModule = convert_to_submodules_graph(
             gm,
             submodule_hook=self.get_naive_decomposer_extractor(rel_model_path),
-            **config,
+            split_positions=split_positions,
+            subgraph_ranges=subgraph_ranges,
+            group_head_and_tail=self.config.get("group_head_and_tail", False),
+            chain_style=self.config.get("chain_style", False),
         )
         rewrited_gm(*inputs)
 
diff --git a/graph_net/torch/graph_variable_renamer.py b/graph_net/torch/graph_variable_renamer.py
@@ -2,6 +2,7 @@
 import torch
 import shutil
 import tempfile
+
 from graph_net.torch.fx_graph_module_util import get_torch_module_and_inputs
 from graph_net.torch.fx_graph_parse_util import parse_sole_graph_module
 from graph_net.tensor_meta import TensorMeta
@@ -77,17 +78,21 @@ def _make_config(
         }
 
     def __call__(self, rel_model_path):
-        src_model_path = os.path.join(self.config["model_path_prefix"], rel_model_path)
-        module, inputs = get_torch_module_and_inputs(src_model_path)
-        gm = parse_sole_graph_module(module, inputs)
-        gm, rename_map = self.rename_graph_variables(gm, inputs, src_model_path)
+        torch.cuda.empty_cache()
+
         dst_model_path = os.path.realpath(
             os.path.join(self.config["output_dir"], rel_model_path)
         )
         if self.config["resume"] and os.path.exists(
             os.path.join(dst_model_path, "model.py")
         ):
             return
+
+        src_model_path = os.path.join(self.config["model_path_prefix"], rel_model_path)
+        module, inputs = get_torch_module_and_inputs(src_model_path)
+        gm = parse_sole_graph_module(module, inputs)
+        gm, rename_map = self.rename_graph_variables(gm, inputs, src_model_path)
+
         Path(dst_model_path).parent.mkdir(parents=True, exist_ok=True)
         with tempfile.TemporaryDirectory(prefix="graph_variable_renamer_") as temp_dir:
             temp_model_path = os.path.join(temp_dir, os.path.basename(dst_model_path))
@@ -97,8 +102,8 @@ def __call__(self, rel_model_path):
                 src_model_path, temp_model_path, rename_map
             )
             self._update_input_meta_py_file(src_model_path, temp_model_path, rename_map)
-            print("Try to run renamed model...")
-            self._try_run(temp_model_path)
+            # print("Try to run renamed model...")
+            # self._try_run(temp_model_path)
             shutil.copytree(temp_model_path, dst_model_path)
 
     def _try_run(self, model_path):
diff --git a/graph_net/torch/typical_sequence_split_points.py b/graph_net/torch/typical_sequence_split_points.py
@@ -5,6 +5,7 @@
 from typing import Any, Dict, List
 import torch
 import torch.nn as nn
+
 from graph_net.torch.rp_expr.rp_expr_parser import RpExprParser
 from graph_net.torch.rp_expr.rp_expr_util import (
     MakeNestedIndexRangeFromLetsListTokenRpExpr,
@@ -69,6 +70,7 @@ def _make_config(
         }
 
     def __call__(self, rel_model_path: str):
+        torch.cuda.empty_cache()
         model_path = os.path.join(self.config["model_path_prefix"], rel_model_path)
         output_path = self._get_output_path(rel_model_path)
         if self.config["resume"] and output_path.exists():