add validator

jorjortuajing · jorjortuajing · commit c6f01faa3d8b · 2025-11-10T19:55:03.000+08:00
diff --git a/graph_net/test/decomposer_validator_test.sh b/graph_net/test/decomposer_validator_test.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+if [ -z "$GRAPH_NET_BENCHMARK_PATH" ]; then
+    GRAPH_NET_BENCHMARK_PATH="$(pwd)"
+fi
+
+FILE_PATH=$GRAPH_NET_BENCHMARK_PATH/decomposer
+mkdir -p "$(dirname "$FILE_PATH/log.log")"
+
+MODEL_PATH="./todo_works/range_decomposer_validator/test/simple_CNN"
+
+python -m graph_net.torch.test_compiler \
+    --model-path $MODEL_PATH \
+    --compiler range_decomposer_validator \
+    --device cuda > "$FILE_PATH/log.log" 2>&1
+
+if [ $? -ne 0 ]; then
+    echo "Error: decomposer_validator execution failed"
+    echo "Please check the log file: $FILE_PATH/log.log"
+    exit 1
+fi
+
+python -m graph_net.log2json \
+    --log-file "$FILE_PATH/log.log" \
+    --output-dir "$FILE_PATH/JSON_results/"
+
+python -m graph_net.plot_ESt \
+    --benchmark-path "$FILE_PATH/JSON_results/" \
+    --output-dir "$FILE_PATH"
+
+echo "=================================================="
+echo "Results saved in: $FILE_PATH/ES_result.png"
+echo ""
+echo "IMPORTANT: Please verify if the curve in ES_result.png is a straight line"
+echo "If the curve is NOT a straight line, please check the log file: $FILE_PATH/log.log"
+echo "=================================================="
diff --git a/graph_net/test/naive_graph_decomposer_test.sh b/graph_net/test/naive_graph_decomposer_test.sh
@@ -4,12 +4,15 @@ GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(
 os.path.dirname(graph_net.__file__))")
 
 # input model path
-MODEL_PATH_IN_SAMPLES=/timm/resnet18 
+MODEL_PATH_IN_SAMPLES=/timm/resnet18
+MODEL_NAME=$(basename "$MODEL_PATH_IN_SAMPLES")
+OUTPUT_DIR="${NAIVE_DECOMPOSE_WORKSPACE:-$(pwd)/naive_decompose_workspace}"
+
 extractor_config_json_str=$(cat <<EOF
 {
     "custom_extractor_path": "$GRAPH_NET_ROOT/torch/naive_graph_decomposer.py",
     "custom_extractor_config": {
-        "output_dir": "/tmp/naive_decompose_workspace",
+        "output_dir": "$OUTPUT_DIR/${MODEL_NAME}_decomposed",
         "split_positions": [8, 16, 32],
         "group_head_and_tail": true,
         "filter_path":"$GRAPH_NET_ROOT/torch/naive_subgraph_filter.py",
@@ -20,5 +23,4 @@ EOF
 )
 EXTRACTOR_CONFIG=$(echo $extractor_config_json_str | base64 -w 0)
 
-mkdir -p /tmp/naive_decompose_workspace
-python3 -m graph_net.torch.single_device_runner --model-path $GRAPH_NET_ROOT/../samples/$MODEL_PATH_IN_SAMPLES --enable-extract True --extract-name resnet18 --dump-graph-hash-key --extractor-config=$EXTRACTOR_CONFIG
+python3 -m graph_net.torch.single_device_runner --model-path $GRAPH_NET_ROOT/../samples/$MODEL_PATH_IN_SAMPLES --enable-extract True --extract-name $MODEL_NAME --dump-graph-hash-key --extractor-config=$EXTRACTOR_CONFIG
diff --git a/graph_net/torch/test_compiler.py b/graph_net/torch/test_compiler.py
@@ -69,6 +69,8 @@ def load_class_from_file(
     exec(compiled_code, module.__dict__)
 
     model_class = getattr(module, class_name, None)
+    setattr(model_class, "__file_path__", file_path)
+    setattr(model_class, "__device__", device)
     return model_class
 
 
diff --git a/todo_works/range_decomposer_validator/range_decomposer_validator.py b/todo_works/range_decomposer_validator/range_decomposer_validator.py
@@ -4,39 +4,106 @@
 import sys
 import inspect
 import importlib.util
-from typing import List, Dict
+import itertools
+from typing import List, Tuple, Dict, Any, Callable
 
 
 class ComposedModel(nn.Module):
-    def __init__(self, submodules: List[nn.Module]):
+    def __init__(self, graph: nn.Module, subgraph: List[nn.Module]):
         super().__init__()
-        self.submodules = nn.ModuleList(submodules)
-        self.submodule_param_names = [
+        self.graph = graph
+        self.subgraph = nn.ModuleList(subgraph)
+        self.subgraph_param_names = [
             list(inspect.signature(sm.forward).parameters.keys())
-            for sm in self.submodules
+            for sm in self.subgraph
         ]
+        self.extract_node = []
+
+    def _serialize_arg(self, arg: Any) -> Any:
+        if isinstance(arg, torch.fx.Node):
+            return arg.name
+        if isinstance(arg, (list, tuple)):
+            return type(arg)(self._serialize_arg(elem) for elem in arg)
+        if isinstance(arg, dict):
+            return {
+                self._serialize_arg(k): self._serialize_arg(v) for k, v in arg.items()
+            }
+        return arg
+
+    def _extract_operators_from_graph(
+        self, gm: nn.Module, example_inputs: List[torch.Tensor] = None
+    ) -> List[Dict[str, Any]]:
+        operator_list = []
+        for node in gm.graph.nodes:
+            if node.op in ("call_method", "call_function", "call_module"):
+                operator_info = {
+                    "op_type": node.op,
+                    "target": node.target,
+                    "name": node.name,
+                    "kwargs": self._serialize_arg(node.kwargs),
+                }
+
+                if isinstance(node.target, Callable):
+                    try:
+                        operator_info["target_name"] = node.target.__name__
+                    except AttributeError:
+                        operator_info["target_name"] = str(node.target)
+                else:
+                    operator_info["target_name"] = str(node.target)
+
+                operator_list.append(operator_info)
+
+        return operator_list
+
+    def extract_compiler(self, gm: torch.fx.GraphModule, inputs: List[torch.Tensor]):
+        operator = self._extract_operators_from_graph(gm, inputs)
+        self.extract_node.append(operator)
+        return gm.forward
 
     def forward(self, **kwargs):
         current_args = kwargs
+        compiled_model = torch.compile(self.graph, backend=self.extract_compiler)
+        compiled_model(**current_args)
+        graph_node_list = list(itertools.chain.from_iterable(self.extract_node))
+        self.extract_node = []
+
         for i, (sm, param_names) in enumerate(
-            zip(self.submodules, self.submodule_param_names)
+            zip(self.subgraph, self.subgraph_param_names)
         ):
-            # 准备当前子图的输入字典
             call_kwargs = {}
             if i > 0:
-                # 对于后续子图，第一个参数是上一个子图的输出
                 first_param_name = param_names[0]
-                call_kwargs[first_param_name] = current_args  # current_args 此时是上一个子图的输出
+                call_kwargs[first_param_name] = current_args
+                remaining_params = param_names[1:]
+            else:
+                remaining_params = param_names
 
-            # 从主输入字典中筛选出当前子图需要的权重参数
-            for name in param_names:
-                if name in current_args:
-                    call_kwargs[name] = current_args[name]
+            for name in remaining_params:
+                if name in kwargs:
+                    call_kwargs[name] = kwargs[name]
 
-            outputs = sm(**call_kwargs)
-            # 假设每个子图只有一个输出，并且返回的是一个元组
+            compiled_model = torch.compile(sm, backend=self.extract_compiler)
+            outputs = compiled_model(**call_kwargs)
             current_args = outputs[0]
 
+        subgraph_node_list = list(itertools.chain.from_iterable(self.extract_node))
+        self.extract_node = []
+
+        if graph_node_list != subgraph_node_list:
+            diff_in_graph = [
+                item for item in graph_node_list if item not in subgraph_node_list
+            ]
+            diff_in_subgraph = [
+                item for item in subgraph_node_list if item not in graph_node_list
+            ]
+
+            error_msg = f"Subgraph segmentation verification failed\n"
+            error_msg += f"Nodes in graph but not in subgraph: {diff_in_graph}\n"
+            error_msg += f"Nodes in subgraph but not in graph: {diff_in_subgraph}"
+            raise ValueError(error_msg)
+        else:
+            print("")
+
         return (current_args,)
 
 
@@ -54,36 +121,32 @@ def _load_model_instance(self, path: str, device: str) -> torch.nn.Module:
         return instance
 
     def __call__(self, model: torch.nn.Module) -> torch.nn.Module:
-        model_file_path = inspect.getfile(
-            model.__class__
-        )  # e.g., /test/simple_CNN/model.py
-        model_dir = os.path.dirname(model_file_path)  # e.g., /test/simple_CNN
-
-        decomposed_parent_dir = (
-            model_dir + "_decomposed"
-        )  # e.g., /test/simple_CNN_decomposed
+        model_file_path = model.__class__.__file_path__
+        model_dir = os.path.dirname(model_file_path)
+        decomposed_parent_dir = model_dir + "_decomposed"
         subgraph_paths = []
         for name in sorted(os.listdir(decomposed_parent_dir)):
             full_path = os.path.join(decomposed_parent_dir, name)
-            if os.path.isdir(full_path) and name.startswith("subgraph_"):
+            if os.path.isdir(full_path) and name[-1].isdigit():
                 subgraph_paths.append(full_path)
 
         print(
             f"[RangeDecomposerValidatorBackend] Found subgraphs: {[os.path.basename(p) for p in subgraph_paths]}"
         )
 
-        submodule_instances = []
-        device = next(model.parameters()).device  # 从传入的model获取device信息
+        device = model.__class__.__device__
+        graph_instances = self._load_model_instance(model_dir, device)
+        subgraph_instances = []
 
         for path in subgraph_paths:
             instance = self._load_model_instance(path, device)
-            submodule_instances.append(instance)
+            subgraph_instances.append(instance)
             dir_name = os.path.basename(path)
             print(
                 f"[RangeDecomposerValidatorBackend] Loaded and instantiated '{dir_name}'"
             )
 
-        composed_model = ComposedModel(submodule_instances)
+        composed_model = ComposedModel(graph_instances, subgraph_instances)
         return composed_model.eval()
 
     def synchronize(self):