Fix Check fusible (#410)

roll-away · web-flow · commit 5f01912e8908 · 2025-12-04T16:18:46.000+08:00
* 1119

* 1120

* 1120.2

* model_path

* remove unnecessary files and pre-committed

* remove unnecessary files and pre-committed

* 1121 remove unnecessary files

* modify rev version

* modify rev version

* modify rev version

* accuracy issues targeted

* test script and modify feature

* return set[str]

* add logfile for test

* filter can get the number of kernels in naive_graph_decomposer

* post extract process feature

* remove unnecessary code blocks and variables

* modify the way of counting kernels used

* modify the way of counting kernels used

* modify script, rename files and variables

* add failure protection and log output when removing directories

* add a script to check fusability of a given model

* add a script to check if a given model is fully fusable

* add a script to check if a given model is fully fusable

* a script to check if a given model is fully fusable

* add a script to check if a given model is fully fusionable

* add a script to find fully fusionable subgraph

* find the biggest fully fusionable subgraph

* find the biggest fusionable subgraph

* add a script to get the biggest fully fusable subgraph

* use tempfile, fix sys problem, remove unsless configs

* find the biggest fully fusible subgraph

* find the biggest fully fusable subgraph in a given graph

* corrrect 'fusable' -&gt; 'fusible'

* remove a useless swp file
diff --git a/graph_net/test/graph_decompose_and_look_for_fully_fusible_subgraph_test.sh b/graph_net/test/graph_decompose_and_look_for_fully_fusible_subgraph_test.sh
@@ -11,18 +11,19 @@ decorator_config_json_str=$(cat <<EOF
     "decorator_path": "$GRAPH_NET_ROOT/torch/extractor.py",
     "decorator_config": {
         "name": "$MODEL_NAME",
-        "custom_extractor_path": "$GRAPH_NET_ROOT/torch/fully_fusable_subgraph_extractor.py",
+        "custom_extractor_path": "$GRAPH_NET_ROOT/torch/fully_fusible_subgraph_extractor.py",
         "custom_extractor_config": {
+            "output_dir": "/tmp/find_fully_fusible_output",
             "split_positions": [],
             "group_head_and_tail": true,
-            "max_step": 5,
+            "max_step": 3,
             "min_step": 2,
-            "max_nodes": 6
+            "max_nodes": 5
         }
     }
 }
 EOF
 )
 DECORATOR_CONFIG=$(echo $decorator_config_json_str | base64 -w 0)
 
-python3 -m graph_net.torch.run_model --model-path $GRAPH_NET_ROOT/../samples/$MODEL_PATH_IN_SAMPLES --decorator-config=$DECORATOR_CONFIG
+python3 -m graph_net.torch.run_model --model-path $GRAPH_NET_ROOT/../samples/$MODEL_PATH_IN_SAMPLES --decorator-config=$DECORATOR_CONFIG
diff --git a/graph_net/test/naive_decomposer_and_post_extract_process_test.sh b/graph_net/test/naive_decomposer_and_post_extract_process_test.sh
@@ -20,7 +20,7 @@ decorator_config_json_str=$(cat <<EOF
             "filter_path":"$GRAPH_NET_ROOT/torch/naive_subgraph_filter.py",
             "filter_config": {},
             "post_extract_process_path":"$GRAPH_NET_ROOT/torch/post_extract_process_count_kernels.py",
-            "post_extract_process_class_name": "GraphFullyFusable"
+            "post_extract_process_class_name": "GraphFullyFusible"
         }
     }
 }
diff --git a/graph_net/torch/extractor.py b/graph_net/torch/extractor.py
@@ -2,7 +2,6 @@
 import torch
 import json
 import shutil
-from typing import Union, Callable
 from graph_net.torch import utils
 from graph_net.torch.fx_graph_serialize_util import serialize_graph_module_to_str
 
@@ -82,7 +81,7 @@ def __call__(self, gm: torch.fx.GraphModule, sample_inputs):
             subgraph_path = model_path
         else:
             if self.subgraph_counter == 1:
-                subgraph_0_path = os.path.join(model_path, f"subgraph_0")
+                subgraph_0_path = os.path.join(model_path, "subgraph_0")
                 self.move_files(model_path, subgraph_0_path)
 
             subgraph_path = os.path.join(
@@ -239,9 +238,12 @@ def forward(self, s0 : torch.SymInt, L_x_ : torch.Tensor):
 
     extractor_config = make_extractor_config(extractor_config)
 
-    def get_graph_extractor_maker():
+    def get_graph_extractor_maker(model_path):
         custom_extractor_path = extractor_config["custom_extractor_path"]
         custom_extractor_config = extractor_config["custom_extractor_config"]
+        if custom_extractor_config is None:
+            custom_extractor_config = {}
+        custom_extractor_config["model_path"] = model_path
         if custom_extractor_path is None:
             return GraphExtractor
         import importlib.util as imp
@@ -254,7 +256,10 @@ def get_graph_extractor_maker():
 
     def wrapper(model: torch.nn.Module):
         assert isinstance(model, torch.nn.Module), f"{type(model)=}"
-        extractor = get_graph_extractor_maker()(
+        model_path = None
+        if hasattr(model, "__graph_net_file_path__"):
+            model_path = os.path.dirname(model.__graph_net_file_path__)
+        extractor = get_graph_extractor_maker(model_path)(
             name, dynamic, mut_graph_codes, placeholder_auto_rename
         )
         # return torch.compile(backend=extractor, dynamic=dynamic)
diff --git a/graph_net/torch/fully_fusable_subgraph_extractor.py b/graph_net/torch/fully_fusable_subgraph_extractor.py
diff --git a/graph_net/torch/fully_fusible_subgraph_extractor.py b/graph_net/torch/fully_fusible_subgraph_extractor.py
@@ -0,0 +1,121 @@
+import os
+import torch
+import graph_net
+import tempfile
+import shutil
+from graph_net.torch import constraint_util
+
+
+class GraphExtractor:
+    def __init__(
+        self,
+        config: dict,
+        name,
+        dynamic,
+        mut_graph_codes=None,
+        placeholder_auto_rename=False,
+    ):
+        self.subgraph_counter = 0
+        self.name = name
+        self.dynamic = dynamic
+        self.mut_graph_codes = mut_graph_codes
+        self.placeholder_auto_rename = placeholder_auto_rename
+        self.config = self.make_config(**config)
+
+    def make_config(
+        self,
+        output_dir=None,
+        split_positions=(),
+        group_head_and_tail=False,
+        chain_style=False,
+        max_step=8,
+        min_step=2,
+        max_nodes=32,
+        model_path=None,
+    ):
+        for pos in split_positions:
+            assert isinstance(
+                pos, int
+            ), f"split_positions should be list of int, {split_positions=}"
+        return {
+            "output_dir": output_dir,
+            "split_positions": split_positions,
+            "group_head_and_tail": group_head_and_tail,
+            "chain_style": chain_style,
+            "max_step": max_step,
+            "min_step": min_step,
+            "max_nodes": max_nodes,
+            "model_path": model_path,
+        }
+
+    def _get_sub_ranges(self):
+        assert self.config["min_step"] >= 1, "min_step must be greater than 1。"
+        assert (
+            self.config["max_step"] >= self.config["min_step"]
+        ), "max_step must be greater than min_step。"
+        for step in reversed(
+            range(self.config["min_step"], self.config["max_step"] + 1)
+        ):
+            assert (
+                self.config["min_step"] <= step <= self.config["max_step"]
+            ), "Internal error: step exceeds configuration range."
+            for start_pos in range(self.config["max_nodes"] - step):
+                end_pos = start_pos + step
+                assert (
+                    0 <= start_pos < end_pos <= self.config["max_nodes"]
+                ), f"Invalid range generated: start={start_pos}, end={end_pos}, max={self.config['max_nodes']}"
+                yield start_pos, end_pos
+
+    def _handle_success(self, temp_dir: str, start_pos: int, end_pos: int) -> str:
+        target_name = f"{self.name}_start{start_pos}_end{end_pos}"
+        target_path = os.path.join(
+            self.config["output_dir"],
+            target_name,
+        )
+        os.makedirs(target_path, exist_ok=True)
+        shutil.move(temp_dir, target_path)
+        return target_path
+
+    def _build_decompose_config(
+        self, temp_dir: str, start_pos: int, end_pos: int
+    ) -> dict:
+        self.config["split_positions"] = [start_pos, end_pos]
+        graph_net_root = os.path.dirname(graph_net.__file__)
+
+        check_fusible_config = {
+            "decorator_path": f"{graph_net_root}/torch/extractor.py",
+            "decorator_config": {
+                "name": f"{self.name}",
+                "custom_extractor_path": f"{graph_net_root}/torch/naive_graph_decomposer.py",
+                "custom_extractor_config": {
+                    "output_dir": temp_dir,
+                    "split_positions": self.config["split_positions"],
+                    "group_head_and_tail": False,
+                    "filter_path": f"{graph_net_root}/torch/naive_subgraph_filter.py",
+                    "filter_config": {},
+                    "post_extract_process_path": f"{graph_net_root}/torch/post_extract_process_count_kernels.py",
+                    "post_extract_process_class_name": "GraphFullyFusible",
+                },
+            },
+        }
+        return check_fusible_config
+
+    def __call__(self, gm: torch.fx.GraphModule, sample_inputs):
+        for start_pos, end_pos in self._get_sub_ranges():
+            with tempfile.TemporaryDirectory(
+                prefix="_find_fusible_subgraph_"
+            ) as temp_dir:
+                check_fusible_config = self._build_decompose_config(
+                    temp_dir, start_pos, end_pos
+                )
+                print("current split_positions:", self.config["split_positions"])
+                success = constraint_util.RunModelPredicator(check_fusible_config)(
+                    self.config["model_path"]
+                )
+                if success:
+                    target_path = self._handle_success(temp_dir, start_pos, end_pos)
+                    print(
+                        f"SUCCESS in finding the biggest fully fusible subgraph. Result saved to: {target_path}"
+                    )
+                    break
+        return gm.forward
diff --git a/graph_net/torch/naive_graph_decomposer.py b/graph_net/torch/naive_graph_decomposer.py
@@ -31,6 +31,7 @@ def make_config(
         filter_config=None,
         post_extract_process_path=None,
         post_extract_process_class_name=None,
+        **kwargs,
     ):
         for pos in split_positions:
             assert isinstance(
diff --git a/graph_net/torch/post_extract_process_count_kernels.py b/graph_net/torch/post_extract_process_count_kernels.py
@@ -6,7 +6,7 @@
 from torch.profiler import profile, record_function, ProfilerActivity
 
 
-class GraphFullyFusable:
+class GraphFullyFusible:
     def __init__(self, config):
         self.config = config
 
@@ -29,21 +29,17 @@ def __call__(self, model_path=None):
         # try to run the model
         try:
             model(**state_dict)
-        except Exception as e:
-            print(f"failed in running model:{e}")
+        except Exception:
             sys.exit(1)
         # try to compile the model
         try:
             compiled_model = torch.compile(model)
-        except Exception as e:
-            print(f"failed in compiling model:{e}")
+        except Exception:
             sys.exit(1)
         compiled_num_of_kernels = count_kernels(compiled_model, state_dict)
         if compiled_num_of_kernels == 1:
-            print(model_path, "can be fully integrated!!!!!!!!!!!")
             sys.exit(0)
         else:
-            print(f"{model_path} can not be fully integrated, to be removed...")
             sys.exit(1)
 
 
diff --git a/graph_net/torch/run_model.py b/graph_net/torch/run_model.py
@@ -1,15 +1,10 @@
 from . import utils
 import argparse
 import importlib.util
-import inspect
 import torch
-import logging
-from pathlib import Path
-from typing import Type, Any
-import sys
+from typing import Type
 import json
 import base64
-from contextlib import contextmanager
 
 
 def load_class_from_file(file_path: str, class_name: str) -> Type[torch.nn.Module]:
@@ -60,7 +55,6 @@ def main(args):
     assert model_class is not None
     model = model_class()
     print(f"{model_path=}")
-
     decorator_config = _convert_to_dict(args.decorator_config)
     if "decorator_path" in decorator_config:
         model = _get_decorator(decorator_config)(model)
@@ -70,7 +64,7 @@ def main(args):
     use_dummy_inputs = get_flag_use_dummy_inputs(decorator_config)
     print(f"{use_dummy_inputs=}")
     state_dict = {k: replay_tensor(v, use_dummy_inputs) for k, v in params.items()}
-
+    model.__graph_net_file_path__ = model_path
     model(**state_dict)
 
 

Original file line number	Diff line number	Diff line change
`@@ -11,18 +11,19 @@ decorator_config_json_str=$(cat <<EOF`
`11`	`11`	`"decorator_path": "$GRAPH_NET_ROOT/torch/extractor.py",`
`12`	`12`	`"decorator_config": {`
`13`	`13`	`"name": "$MODEL_NAME",`
`14`		`- "custom_extractor_path": "$GRAPH_NET_ROOT/torch/fully_fusable_subgraph_extractor.py",`
	`14`	`+ "custom_extractor_path": "$GRAPH_NET_ROOT/torch/fully_fusible_subgraph_extractor.py",`
`15`	`15`	`"custom_extractor_config": {`
	`16`	`+ "output_dir": "/tmp/find_fully_fusible_output",`
`16`	`17`	`"split_positions": [],`
`17`	`18`	`"group_head_and_tail": true,`
`18`		`- "max_step": 5,`
	`19`	`+ "max_step": 3,`
`19`	`20`	`"min_step": 2,`
`20`		`- "max_nodes": 6`
	`21`	`+ "max_nodes": 5`
`21`	`22`	`}`
`22`	`23`	`}`
`23`	`24`	`}`
`24`	`25`	`EOF`
`25`	`26`	`)`
`26`	`27`	`DECORATOR_CONFIG=$(echo $decorator_config_json_str \| base64 -w 0)`
`27`	`28`
`28`		`-python3 -m graph_net.torch.run_model --model-path $GRAPH_NET_ROOT/../samples/$MODEL_PATH_IN_SAMPLES --decorator-config=$DECORATOR_CONFIG`
	`29`	`+python3 -m graph_net.torch.run_model --model-path $GRAPH_NET_ROOT/../samples/$MODEL_PATH_IN_SAMPLES --decorator-config=$DECORATOR_CONFIG`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ decorator_config_json_str=$(cat <<EOF`
`20`	`20`	`"filter_path":"$GRAPH_NET_ROOT/torch/naive_subgraph_filter.py",`
`21`	`21`	`"filter_config": {},`
`22`	`22`	`"post_extract_process_path":"$GRAPH_NET_ROOT/torch/post_extract_process_count_kernels.py",`
`23`		`- "post_extract_process_class_name": "GraphFullyFusable"`
	`23`	`+ "post_extract_process_class_name": "GraphFullyFusible"`
`24`	`24`	`}`
`25`	`25`	`}`
`26`	`26`	`}`