From bc9a7c80ff81955134ca78335a1c23300940c6de Mon Sep 17 00:00:00 2001
From: Stephen Jia <ssjia@meta.com>
Date: Wed, 26 Mar 2025 08:57:53 -0700
Subject: [PATCH] [ET-VK][benchmarking][ez] Don't perform copies when
 benchmarking

Pull Request resolved: https://github.com/pytorch/executorch/pull/9468

## Context

The bencmarks generated by the generated operator benchmarks currently have a high amount of copy overhead:

1. Copy from CPU to staging
2. Copy from staging to GPU Buffer/Image

And this is done for both inputs and outputs.

Since benchmarks are not correctness tests, copying data in/out is not really necessary especially if the compute shader does not have behaviour dependent on the contents of the input/output tensor.

Make it so that by default, the benchmark will only execute the op without adding copy overhead. However, test cases can optionally specify that the copy overhead should be included in the benchmark.

Differential Revision: [D71570143](https://our.internmc.facebook.com/intern/diff/D71570143/)
ghstack-source-id: 274197244
---
 .../test/op_tests/utils/gen_benchmark_vk.py   | 10 +++---
 .../test/op_tests/utils/gen_computegraph.py   | 36 +++++++++++++------
 .../vulkan/test/op_tests/utils/test_suite.py  |  1 +
 3 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/backends/vulkan/test/op_tests/utils/gen_benchmark_vk.py b/backends/vulkan/test/op_tests/utils/gen_benchmark_vk.py
index d36c7c85a32..983d2c82bd0 100644
--- a/backends/vulkan/test/op_tests/utils/gen_benchmark_vk.py
+++ b/backends/vulkan/test/op_tests/utils/gen_benchmark_vk.py
@@ -12,7 +12,7 @@
 from executorch.backends.vulkan.test.op_tests.utils.gen_correctness_base import (
     CorrectnessTestGen,
 )
-from executorch.backends.vulkan.test.op_tests.utils.test_suite import TestSuite
+from executorch.backends.vulkan.test.op_tests.utils.test_suite import VkTestSuite
 
 from torchgen.model import NativeFunction
 
@@ -72,10 +72,12 @@ class GeneratedOpBenchmark_{op_name} : public ::benchmark::Fixture {{
 
 
 class VkBenchmarkGen(CorrectnessTestGen):
-    def __init__(self, op_reg_name: str, f: NativeFunction, inputs: TestSuite):
+    def __init__(self, op_reg_name: str, f: NativeFunction, inputs: VkTestSuite):
         super().__init__(f, inputs)
         self.op_reg_name = op_reg_name
-        self.generator = ComputeGraphGen(self.op_reg_name, self.f, self.suite_def)
+        self.generator = ComputeGraphGen(
+            self.op_reg_name, self.f, self.suite_def, inputs.force_io
+        )
 
     def gen_call_benchmark(self, prepack=False) -> str:
         test_str = f"benchmark_{self.op_name}("
@@ -197,7 +199,7 @@ def generate_benchmark_fixture(self) -> str:
     float high = 1.0) {{
   if (high == 1.0 && low == 0.0)
     return at::rand(sizes, at::device(at::kCPU).dtype(dtype));
-    
+
   if (dtype == at::kChar)
     return at::randint(high, sizes, at::device(at::kCPU).dtype(dtype));
 
diff --git a/backends/vulkan/test/op_tests/utils/gen_computegraph.py b/backends/vulkan/test/op_tests/utils/gen_computegraph.py
index 6f93e662076..708da8eab85 100644
--- a/backends/vulkan/test/op_tests/utils/gen_computegraph.py
+++ b/backends/vulkan/test/op_tests/utils/gen_computegraph.py
@@ -90,10 +90,17 @@ def vk_out(self):
 class ComputeGraphGen:
     backend_key = None
 
-    def __init__(self, op_reg_name: str, f: NativeFunction, suite_def: TestSuite):
+    def __init__(
+        self,
+        op_reg_name: str,
+        f: NativeFunction,
+        suite_def: TestSuite,
+        include_io: bool = True,
+    ):
         self.op_reg_name = op_reg_name
         self.f = f
         self.suite_def = suite_def
+        self.include_io = include_io
 
         self.f_sig = CppSignatureGroup.from_native_function(
             self.f, method=False, fallback_binding=self.f.manual_cpp_binding
@@ -275,6 +282,10 @@ def create_value_for(  # noqa: C901
         prepack = self.prepack_ref(ref)
         ref_is_view = self.suite_def.is_view_op and ref.is_out
 
+        # If skipping IO, force is_in to be False
+        if not self.include_io and ref.is_in:
+            ref.is_in = False
+
         cpp_type = "IOValueRef" if (ref.is_in and not prepack) else "ValueRef"
         if not include_declarations:
             cpp_type = ""
@@ -602,7 +613,8 @@ def gen_graph_build_code(self, include_declarations: bool = True) -> str:
         graph_build += self.create_value_for(self.refs["out"], include_declarations)
         graph_build += self.create_op_call()
 
-        graph_build += self.set_output(self.refs["out"], include_declarations)
+        if self.include_io:
+            graph_build += self.set_output(self.refs["out"], include_declarations)
 
         graph_build += f"{self.graph}{self.dot}prepare();\n"
         graph_build += f"{self.graph}{self.dot}encode_prepack();\n"
@@ -614,18 +626,22 @@ def gen_graph_build_code(self, include_declarations: bool = True) -> str:
 
     def gen_graph_exec_code(self, check_output=True) -> str:
         graph_exec = ""
-        for aten_arg in self.args:
-            ref = self.refs[aten_arg.name]
-            if ref.is_in:
-                graph_exec += self.virtual_resize(ref)
-                graph_exec += self.copy_into_staging(ref)
+        if self.include_io:
+            for aten_arg in self.args:
+                ref = self.refs[aten_arg.name]
+                if ref.is_in:
+                    graph_exec += self.virtual_resize(ref)
+                    graph_exec += self.copy_into_staging(ref)
+
+            graph_exec += f"{self.graph}{self.dot}propagate_resize();\n"
 
-        graph_exec += f"{self.graph}{self.dot}propagate_resize();\n"
         graph_exec += f"{self.graph}{self.dot}execute();\n"
 
         graph_exec += self.declare_vk_out_for(self.refs["out"])
-        graph_exec += self.copy_from_staging(self.refs["out"])
-        if check_output:
+        if self.include_io:
+            graph_exec += self.copy_from_staging(self.refs["out"])
+
+        if self.include_io and check_output:
             graph_exec += self.check_graph_out(self.refs["out"])
 
         graph_exec = re.sub(r"^", "  ", graph_exec, flags=re.M)
diff --git a/backends/vulkan/test/op_tests/utils/test_suite.py b/backends/vulkan/test/op_tests/utils/test_suite.py
index dd01bdde3a4..72ba457b5af 100644
--- a/backends/vulkan/test/op_tests/utils/test_suite.py
+++ b/backends/vulkan/test/op_tests/utils/test_suite.py
@@ -47,3 +47,4 @@ def __init__(self, input_cases: List[Any]):
         self.storage_types: List[str] = ["utils::kTexture3D"]
         self.layouts: List[str] = ["utils::kChannelsPacked"]
         self.data_gen: str = "make_rand_tensor"
+        self.force_io: bool = True