add tvm

fangfangssj · fangfangssj · commit 0cd7c356b362 · 2025-09-02T08:40:44.000+08:00
diff --git a/graph_net/torch/backend/tvm_backend.py b/graph_net/torch/backend/tvm_backend.py
@@ -0,0 +1,80 @@
+import torch
+import inspect
+import numpy as np
+from .graph_compiler_backend import GraphCompilerBackend
+
+try:
+    import tvm
+    from tvm import relax
+    from tvm import dlight as dl
+    from tvm.relax.frontend.torch import dynamo_capture_subgraphs
+except ImportError:
+    tvm = None
+    relax = None
+    from_exported_program = None
+
+
+class TvmCompiledModule(torch.nn.Module):
+    def __init__(self, module, device):
+        super().__init__()
+        self.module = module
+        self.counter = 0
+        self.tvm_input = []
+        self.compiled_vm = None
+        self.dev = tvm.device(device)
+        self.target = tvm.target.Target.from_device(self.dev)
+        self.param_names = list(inspect.signature(module.forward).parameters.keys())
+
+    def forward(self, **kwargs):
+        if self.counter == 0:
+            self.compiled_vm = self.compile(self.module, **kwargs)
+            for name in self.param_names:
+                if name in kwargs and name != "s1":
+                    param = kwargs[name]
+                    self.tvm_input.append(tvm.nd.array(param.cpu(), self.dev))
+
+        output = self.compiled_vm["subgraph_0"](*self.tvm_input).numpy()
+        self.counter += 1
+        return torch.from_numpy(output)
+
+    def compile(self, module, **kwargs):
+        with torch.no_grad():
+            mod = dynamo_capture_subgraphs(module, **kwargs, keep_params_as_input=True)
+        mod, _ = relax.frontend.detach_params(mod)
+        with self.target:
+            mod = tvm.ir.transform.Sequential(
+                [
+                    relax.get_pipeline("zero"),
+                    dl.ApplyDefaultSchedule(
+                        dl.gpu.Matmul(),
+                        dl.gpu.GEMV(),
+                        dl.gpu.Reduction(),
+                        dl.gpu.GeneralReduction(),
+                        dl.gpu.Fallback(),
+                    ),
+                ]
+            )(mod)
+        ex = tvm.compile(mod, target=self.target)
+        vm = relax.VirtualMachine(ex, self.dev)
+        return vm
+
+
+class TvmBackend(GraphCompilerBackend):
+    def __call__(self, model, **kwargs):
+        if torch.cuda.is_available():
+            device = "cuda"
+        else:
+            device = "llvm"
+        return TvmCompiledModule(model, device=device)
+
+    def synchronize(self):
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+
+    def version(self):
+        try:
+            from importlib.metadata import version
+
+            return version("tvm")
+        except:
+            return "unknown"
diff --git a/graph_net/torch/test_compiler.py b/graph_net/torch/test_compiler.py
@@ -15,11 +15,13 @@
 import numpy as np
 import platform
 from graph_net.torch.backend.graph_compiler_backend import GraphCompilerBackend
+from graph_net.torch.backend.tvm_backend import TvmBackend
 from graph_net.torch.backend.inductor_backend import InductorBackend
 from graph_net.torch.backend.tensorrt_backend import TensorRTBackend
 from graph_net.torch.backend.blade_disc_backend import BladeDISCBackend
 
 registry_backend = {
+    "tvm": TvmBackend(),
     "inductor": InductorBackend(),
     "tensorrt": TensorRTBackend(),
     "bladedisc": BladeDISCBackend(),
@@ -35,7 +37,7 @@ def load_class_from_file(
 
     with open(file_path, "r", encoding="utf-8") as f:
         model_code = f.read()
-    model_code = utils.update_device(model_code, args.device)
+    model_code = utils.modify_code_by_device(model_code, args.device)
     spec = importlib.util.spec_from_loader(module_name, loader=None)
     module = importlib.util.module_from_spec(spec)
     sys.modules[module_name] = module
@@ -226,6 +228,10 @@ def test_single_model(args):
 
     if args.compiler == "inductor":
         result_data["configuration"]["compile_framework_version"] = torch.__version__
+    elif args.compiler == "tvm":
+        result_data["configuration"][
+            "compile_framework_version"
+        ] = f"Tvm {compiler.version}"
     elif args.compiler == "tensorrt":
         result_data["configuration"][
             "compile_framework_version"
@@ -245,6 +251,7 @@ def test_single_model(args):
 
     expected_out = eager_model_call()
     compiled_out = compiled_model_call()
+    compiled_out = (tensor.to(args.device) for tensor in compiled_out)
 
     def print_and_store_cmp(key, func, **kwargs):
         cmp_ret = func(expected_out, compiled_out, **kwargs)
diff --git a/graph_net/torch/utils.py b/graph_net/torch/utils.py
@@ -272,14 +272,14 @@ def replay_tensor(info):
     return torch.randn(size=shape).to(dtype).to(device) * std * 0.2 + mean
 
 
-def update_device(code, device):
+def modify_code_by_device(code, device):
     if device == "cuda":
         pattern = r'device\(type="cpu"\)'
         replacement = f'device(type="cuda", index={torch.cuda.current_device()})'
-        updated_code = re.sub(pattern, replacement, code)
-        return updated_code
+        modify_code = re.sub(pattern, replacement, code)
+        return modify_code
     else:
         pattern = r'device\(type="cuda"(?:, index=\d+)?\)'
         replacement = 'device(type="cpu")'
-        updated_code = re.sub(pattern, replacement, code)
-        return updated_code
+        modify_code = re.sub(pattern, replacement, code)
+        return modify_code