Improve efficiency of test/typical_sequence_decomposer_test.sh (#438)

lixinqi · JewelRoam · web-flow · commit 201d5b98e819 · 2025-12-10T16:27:14.000+08:00
* debug_typical_sequence

* support model-path-prefix in splitting positions

* fix

* fix

* Improve efficiency of test/typical_sequence_decomposer_test.sh

---------

Co-authored-by: JewelRoam &lt;2752594773@qq.com&gt;
diff --git a/graph_net/test/dev_model_list/validation_error_model_list.txt b/graph_net/test/dev_model_list/validation_error_model_list.txt
@@ -0,0 +1,20 @@
+samples/timm/convnextv2_base.fcmae_ft_in1k
+samples/timm/hgnet_tiny.paddle_in1k
+samples/timm/mobilenetv4_conv_aa_large.e230_r384_in12k
+samples/timm/regnety_080_tv.tv2_in1k
+samples/timm/res2net50_14w_8s.in1k
+samples/torchaudio/wavlm_base
+samples/torchgeometric/RECT_L
+samples/torchvision/vgg16_bn
+samples/transformers-auto-model/bge-small-en-v1.5
+samples/transformers-auto-model/distilbert_distilbert-base-multilingual-cased
+samples/transformers-auto-model/OFA-Sys_chinese-clip-vit-large-patch14
+samples/transformers-auto-model/opus-mt-ase-es
+samples/transformers-auto-model/opus-mt-en-gv
+samples/transformers-auto-model/opus-mt-en-phi
+samples/transformers-auto-model/opus-mt-en-sal
+samples/transformers-auto-model/opus-mt-en-tw
+samples/transformers-auto-model/opus-mt-fi-niu
+samples/transformers-auto-model/opus-mt-tc-bible-big-deu_eng_fra_por_spa-bat
+samples/transformers-auto-model/opus-mt-tc-bible-big-gmw-deu_eng_fra_por_spa
+samples/ultralytics/yolov3-tinyu
diff --git a/graph_net/test/naive_decomposer_and_post_extract_process_test.sh b/graph_net/test/naive_decomposer_and_post_extract_process_test.sh
@@ -1,5 +1,4 @@
 #!/bin/bash
-# bash graph_net/test/naive_decomposer_and_post_extract_process_test.sh
 
 GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(
 os.path.dirname(graph_net.__file__))")
diff --git a/graph_net/test/typical_sequence_decomposer_test.sh b/graph_net/test/typical_sequence_decomposer_test.sh
@@ -2,13 +2,16 @@
 
 GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(os.path.dirname(graph_net.__file__)))")
 DECOMPOSE_PATH=/tmp/decompose_workspace
+# DECOMPOSE_PATH=$GRAPH_NET_ROOT/decompose_test_level5_100
 
 mkdir -p "$DECOMPOSE_PATH"
 
-model_list="$GRAPH_NET_ROOT/graph_net/config/small100_torch_samples_list.txt"
+# model_list="$GRAPH_NET_ROOT/graph_net/config/small100_torch_samples_list.txt"
+model_list="$GRAPH_NET_ROOT/graph_net/test/dev_model_list/validation_error_model_list.txt"
 
 python3 -m graph_net.torch.typical_sequence_split_points \
     --model-list "$model_list" \
+    --model-path-prefix "$GRAPH_NET_ROOT" \
     --device "cuda" \
     --window-size 10 \
     --fold-policy default \
@@ -54,4 +57,4 @@ python3 -m graph_net.torch.test_compiler \
 
 python3 -m graph_net.plot_ESt \
     --benchmark-path "$DECOMPOSE_PATH/validation.log" \
-    --output-dir "$DECOMPOSE_PATH"
+    --output-dir "$DECOMPOSE_PATH"
diff --git a/graph_net/torch/fx_graph_parse_util.py b/graph_net/torch/fx_graph_parse_util.py
@@ -122,19 +122,28 @@ def _rename_placeholder(name, pattern2replacement):
     return name
 
 
-def parse_sole_graph_module(module, inputs):
+def parse_sole_graph_module_without_varify(module, inputs):
     traced_module = None
     traced_sample_inputs = None
 
     def my_backend(gm, sample_inputs):
         nonlocal traced_module
-        traced_module = gm
         nonlocal traced_sample_inputs
+        assert traced_module is None
+        assert traced_sample_inputs is None
+        traced_module = gm
         traced_sample_inputs = sample_inputs
         return gm.forward
 
     torch.compile(module, backend=my_backend)(*inputs)
     assert traced_module is not None
+    return traced_module, traced_sample_inputs
+
+
+def parse_sole_graph_module(module, inputs):
+    traced_module, traced_sample_inputs = parse_sole_graph_module_without_varify(
+        module, inputs
+    )
 
     def get_input_names_from_signature():
         return inspect.signature(module.forward).parameters
diff --git a/graph_net/torch/graph_decomposer.py b/graph_net/torch/graph_decomposer.py
@@ -269,7 +269,6 @@ def forward(self, *args):
         if not self.extracted:
             if self.need_extract(self.submodule, args):
                 self.builtin_extractor(self.submodule, args)
-                self._post_extract_process()
             self.extracted = True
         return self.submodule(*args)
 
diff --git a/graph_net/torch/typical_sequence_split_points.py b/graph_net/torch/typical_sequence_split_points.py
@@ -3,13 +3,11 @@
 import os
 from pathlib import Path
 from typing import Any, Dict, List
-
 import torch
 import torch.nn as nn
-import tempfile
-import graph_net.imp_util
-from graph_net.torch import utils as graph_utils
 from graph_net.torch.rp_expr.rp_expr_parser import RpExprParser
+from graph_net.torch.fx_graph_module_util import get_torch_module_and_inputs
+from graph_net.torch.fx_graph_parse_util import parse_sole_graph_module_without_varify
 
 
 class TypicalSequenceExtractor:
@@ -28,9 +26,12 @@ def _extract_operators_from_graph(
 
             if node.op == "call_module":
                 target_name = type(named_modules[node.target]).__name__
-            else:
+            elif node.op == "call_method":
+                target_name = f"Tensor.{node.target}"
+            elif node.op == "call_function":
                 target_name = getattr(node.target, "__name__", str(node.target))
-
+            else:
+                raise NotImplementedError()
             operator_list.append(
                 {
                     "op_type": node.op,
@@ -48,39 +49,6 @@ def extract_compiler(self, gm: torch.fx.GraphModule, inputs: List[torch.Tensor])
         return gm.forward
 
 
-class TypicalSequenceModelLoader:
-    def load_class_from_file(self, model_path: str, device: str) -> Any:
-        file_path = os.path.join(model_path, "model.py")
-
-        if not os.path.exists(file_path):
-            raise FileNotFoundError(f"Model file not found: {file_path}")
-
-        with open(file_path, "r", encoding="utf-8") as f:
-            model_code = f.read()
-        model_code = graph_utils.modify_code_by_device(model_code, device)
-
-        with tempfile.NamedTemporaryFile(
-            mode="w", suffix=".py", encoding="utf-8"
-        ) as temp_file:
-            temp_file.write(model_code)
-            module = graph_net.imp_util.load_module(temp_file.name)
-        model_class = getattr(module, "GraphModule", None)
-
-        return model_class
-
-    def get_input_dict(self, model_path: str, device: str) -> Dict[str, torch.Tensor]:
-        inputs_params = graph_utils.load_converted_from_text(f"{model_path}")
-        params = inputs_params["weight_info"]
-        for tensor_meta in params.values():
-            if hasattr(tensor_meta, "device"):
-                tensor_meta.device = device
-        input_dict = {
-            k: graph_utils.replay_tensor(v).to(torch.device(device))
-            for k, v in params.items()
-        }
-        return input_dict
-
-
 class SplitAnalyzer:
     def __init__(
         self, window_size: int = 10, fold_policy: str = "default", fold_times: int = 0
@@ -109,20 +77,11 @@ def _resolve_token_to_ops(
     def _extract_ops_via_compile(
         self, model_path: str, device: str = "cpu"
     ) -> List[str]:
-        loader = TypicalSequenceModelLoader()
-        print(f"Loading model from {model_path} on {device}...")
-        try:
-            model_class = loader.load_class_from_file(model_path, device)
-            model = model_class().to(torch.device(device))
-            model.eval()
-            input_dict = loader.get_input_dict(model_path, device)
-        except Exception as e:
-            print(f"Error loading/preparing model {model_path}: {e}")
-            return []
-
+        print(f"extracting ops from {model_path}")
         extractor = TypicalSequenceExtractor()
-        compiled_model = torch.compile(model, backend=extractor.extract_compiler)
-        compiled_model(**input_dict)
+        model, inputs = get_torch_module_and_inputs(model_path)
+        compiled_model, _ = parse_sole_graph_module_without_varify(model, inputs)
+        extractor.extract_compiler(compiled_model, inputs)
         ops_info = extractor.extract_node
 
         return [op["target_name"] for op in ops_info]
@@ -150,11 +109,13 @@ def get_len(tid):
             get_len(sym_id)
         return token2len
 
-    def analyze(self, model_paths_file: str, device: str) -> Dict[str, Dict]:
+    def analyze(
+        self, model_path_prefix: str, model_paths_file: str, device: str
+    ) -> Dict[str, Dict]:
         input_file = Path(model_paths_file)
 
         with open(input_file, "r") as f:
-            model_paths = [
+            rel_model_paths = [
                 Path(line.strip())
                 for line in f
                 if line.strip() and not line.startswith("#")
@@ -163,15 +124,15 @@ def analyze(self, model_paths_file: str, device: str) -> Dict[str, Dict]:
         inputs_seqs = []
         valid_models = []
 
-        for p in model_paths:
-            seq = self._extract_ops_via_compile(str(p), device)
+        for p in rel_model_paths:
+            model_full_path = os.path.join(model_path_prefix, p)
+            seq = self._extract_ops_via_compile(model_full_path, device)
             if seq:
                 inputs_seqs.append(seq)
                 valid_models.append((p.name, p))
 
         if not inputs_seqs:
             return {}
-
         rp_parser = RpExprParser(
             window_size=self.window_size,
             fold_policy=self.fold_policy,
@@ -264,7 +225,7 @@ def main(args):
         fold_policy=args.fold_policy,
         fold_times=args.fold_times,
     )
-    results = analyzer.analyze(args.model_list, args.device)
+    results = analyzer.analyze(args.model_path_prefix, args.model_list, args.device)
     if args.output_json:
         with open(args.output_json, "w") as f:
             json.dump(results, f, indent=4)
@@ -280,6 +241,12 @@ def main(args):
         required=True,
         help="Path to a text file containing paths to models (one per line).",
     )
+    parser.add_argument(
+        "--model-path-prefix",
+        type=str,
+        default="./",
+        help="Prefix to add to each model path in the list.",
+    )
     parser.add_argument(
         "--device",
         type=str,

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`#!/bin/bash`
`2`		`-# bash graph_net/test/naive_decomposer_and_post_extract_process_test.sh`
`3`	`2`
`4`	`3`	`GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(`
`5`	`4`	`os.path.dirname(graph_net.__file__))")`