Update on "[ET-VK][testing] Add scripts to facilitate operator testiing"

ssjia · ssjia · commit b49597b54304 · 2025-08-22T07:31:09.000-07:00
Differential Revision: [D80800081](https://our.internmc.facebook.com/intern/diff/D80800081) [ghstack-poisoned]
diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py
@@ -397,14 +397,17 @@ def check_reduce_node(node: torch.fx.Node) -> bool:
                 # If we can't get memory layout information, we'll assume the dims aren't packed
                 pass
 
-        keepdim = node.args[2]
-        if isinstance(keepdim, bool) and not keepdim:
+        def try_find_keepdim_arg(node: torch.fx.Node) -> bool:
+            for arg in node.args:
+                if isinstance(arg, bool):
+                    return arg
+
+            # Assume false by default
             return False
 
-        if len(node.args) > 2:
-            keepdim = node.args[2]
-            if isinstance(keepdim, bool) and not keepdim:
-                return False
+        keepdim = try_find_keepdim_arg(node)
+        if isinstance(keepdim, bool) and not keepdim:
+            return False
 
         return True
 
diff --git a/backends/vulkan/partitioner/vulkan_partitioner.py b/backends/vulkan/partitioner/vulkan_partitioner.py
@@ -204,7 +204,7 @@ def is_in_local_scalar_dense_chain(self, node: torch.fx.Node) -> Tuple[bool, boo
     def log_skip(self, node: torch.fx.Node, reason: str) -> None:
         if node.op == "call_function":
             logger.info(
-                f"[Vulkan Partitioner] Due to [{reason}], skipping {node.format_node()}"
+                f"[Vulkan Partitioner] Due to [{reason}], skipping {utils.node_io_str(node)}"
             )
 
     def is_node_supported(
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d.glsl
@@ -30,6 +30,8 @@ ${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
+${layout_declare_spec_const(C, "int", "ngroups", "1")}
+
 /*
  * Computes a 2D convolution. Each shader invocation calculates the output at
  * a single output location.
@@ -74,7 +76,18 @@ void main() {
   // Perform the convolution by iterating over the overlay region.
   VEC4_T sum = texelFetch(t_bias, ivec2(pos.z, 0), 0);
   const int ic4 = in_group_size / 4;
-  for (int z4 = 0; z4 < ic4; ++z4, kstart.x += kernel_size.x * 4) {
+
+  int z_start = 0;
+  int z_end = ic4;
+  if (ngroups > 1) {
+    const int group_size = (out_limits.z) / ngroups;
+    const int group_idx = pos.z / group_size;
+
+    z_start = ic4 * group_idx;
+    z_end = z_start + ic4;
+  }
+
+  for (int z4 = z_start; z4 < z_end; ++z4, kstart.x += kernel_size.x * 4) {
     for (int y = start.y, ky = kstart.y; y < end.y; y += dilation.y, ++ky) {
       for (int x = start.x, kx = kstart.x; x < end.x; x += dilation.x, kx += 4) {
         const VEC4_T in_texel = texelFetch(t_in, ivec3(x, y, z4), 0);
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl
@@ -30,6 +30,8 @@ ${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
+${layout_declare_spec_const(C, "int", "ngroups", "1")}
+
 /*
  * Computes a depthwise convolution. Each shader invocation calculates the
  * output at a single output location.
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl
@@ -38,6 +38,8 @@ layout(push_constant) uniform restrict Block {
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
+${layout_declare_spec_const(C, "int", "ngroups", "1")}
+
 #extension GL_EXT_control_flow_attributes : require
 
 /*
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl
@@ -40,6 +40,8 @@ layout(push_constant) uniform restrict Block {
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
+${layout_declare_spec_const(C, "int", "ngroups", "1")}
+
 #extension GL_EXT_control_flow_attributes : require
 
 /*
diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
@@ -280,9 +280,6 @@ Conv2dMethod get_conv2d_method(
   if (!transposed && weight_sizes.at(0) == groups && weight_sizes.at(1) == 1) {
     return Conv2dMethod::Depthwise;
   }
-  if (groups > 1) {
-    VK_THROW("aten.convolution.default: groups > 1 is not supported yet!");
-  }
   if (transposed) {
     return Conv2dMethod::Transposed;
   }
@@ -601,7 +598,7 @@ void add_conv2d_node(
       // Push Constants
       push_constants,
       // Specialization Constants
-      {},
+      {utils::safe_downcast<int32_t>(groups_val)},
       // Resize Args
       {weight_data, stride, padding, dilation, transposed, output_padding},
       // Resizing Logic
diff --git a/backends/vulkan/targets.bzl b/backends/vulkan/targets.bzl
@@ -387,6 +387,8 @@ def define_common_targets(is_fbcode = False):
                 "//executorch/backends/transforms:view_copy_to_squeeze_unsqueeze",
                 "//executorch/backends/vulkan/_passes:vulkan_passes",
                 "//executorch/backends/vulkan/serialization:lib",
+                "//executorch/backends/transforms:remove_getitem_op",
+                "//executorch/backends/xnnpack/_passes:xnnpack_passes",
                 "//executorch/exir/backend:backend_details",
             ],
         )
diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py
@@ -297,6 +297,28 @@ def get_conv_inputs():
     )
 
     test_cases = [
+        Test(
+            self=(1, 64, 256, 256),
+            weight=(64, 32, 3, 3),
+            bias=None,
+            stride=[1, 1],
+            padding=[1, 1],
+            dilation=[1, 1],
+            transposed=False,
+            output_padding=[0, 0],
+            groups=2,
+        ),
+        Test(
+            self=(1, 16, 3, 3),
+            weight=(16, 8, 3, 3),
+            bias=None,
+            stride=[1, 1],
+            padding=[1, 1],
+            dilation=[1, 1],
+            transposed=False,
+            output_padding=[0, 0],
+            groups=2,
+        ),
         Test(
             self=(1, 6, 40, 50),
             weight=(8, 6, 3, 3),
diff --git a/backends/vulkan/utils.py b/backends/vulkan/utils.py
@@ -1059,6 +1059,8 @@ def get_node_val_str(node: torch.fx.Node) -> str:
         assert isinstance(node.meta["val"], (list, tuple))
         return f"[{', '.join(get_tensor_val_str(t) for t in node.meta['val'])}]"
     else:
+        if "val" not in node.meta:
+            return str(node)
         return str(node.meta["val"])
 
 
diff --git a/backends/vulkan/vulkan_preprocess.py b/backends/vulkan/vulkan_preprocess.py
@@ -13,9 +13,6 @@
 import executorch.backends.vulkan.utils as utils
 
 from executorch.backends.transforms.addmm_mm_to_linear import AddmmToLinearTransform
-from executorch.backends.transforms.fuse_batch_norm_with_conv import (
-    FuseBatchNormWithConvPass,
-)
 from executorch.backends.transforms.fuse_conv_with_clamp import FuseClampPass
 from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
 from executorch.backends.transforms.view_copy_to_squeeze_unsqueeze import (
@@ -40,6 +37,7 @@
 from executorch.backends.vulkan.serialization.vulkan_graph_serialize import (
     serialize_vulkan_graph,
 )
+from executorch.backends.xnnpack._passes import FuseBatchNormPass
 
 from executorch.exir.backend.backend_details import (
     BackendDetails,
@@ -162,7 +160,7 @@ def preprocess(  # noqa: C901
                 SqueezeUnsqueezeInputs(),
                 FuseViewCopyTransform(),
                 ViewCopyToSqueezeUnsqueezePass(),
-                FuseBatchNormWithConvPass(program),
+                FuseBatchNormPass(program),
                 FuseClampPass(),
             ],
         )
diff --git a/extension/android/BUCK b/extension/android/BUCK
@@ -10,7 +10,6 @@ non_fbcode_target(_kind = fb_android_library,
         "executorch_android/src/main/java/org/pytorch/executorch/DType.java",
         "executorch_android/src/main/java/org/pytorch/executorch/EValue.java",
         "executorch_android/src/main/java/org/pytorch/executorch/ExecuTorchRuntime.java",
-        "executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.java",
         "executorch_android/src/main/java/org/pytorch/executorch/MethodMetadata.java",
         "executorch_android/src/main/java/org/pytorch/executorch/Module.java",
         "executorch_android/src/main/java/org/pytorch/executorch/Tensor.java",
diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt
@@ -71,7 +71,6 @@ executorch_target_link_options_shared_lib(executorch)
 
 add_library(
   executorch_jni SHARED jni/jni_layer.cpp jni/log.cpp jni/jni_layer_runtime.cpp
-                        jni/jni_helper.cpp
 )
 
 set(link_libraries)
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/ExecutorchRuntimeException.java
diff --git a/extension/android/jni/BUCK b/extension/android/jni/BUCK
@@ -7,14 +7,6 @@ load(":build_defs.bzl", "ET_JNI_COMPILER_FLAGS")
 
 oncall("executorch")
 
-# Define the common JNI source files
-shared_srcs = [
-    "jni_layer.cpp",
-    "jni_layer_runtime.cpp",
-    "jni_helper.cpp",
-    "log.cpp",
-]
-
 non_fbcode_target(_kind = executorch_generated_lib,
     name = "generated_op_lib_optimized",
     custom_ops_aten_kernel_deps = [
@@ -36,7 +28,7 @@ non_fbcode_target(_kind = executorch_generated_lib,
 
 non_fbcode_target(_kind = fb_android_cxx_library,
     name = "executorch_jni",
-    srcs = shared_srcs,
+    srcs = ["jni_layer.cpp", "log.cpp", "jni_layer_runtime.cpp"],
     allow_jni_merging = False,
     compiler_flags = ET_JNI_COMPILER_FLAGS,
     soname = "libexecutorch.$(ext)",
@@ -57,7 +49,7 @@ non_fbcode_target(_kind = fb_android_cxx_library,
 
 non_fbcode_target(_kind = fb_android_cxx_library,
     name = "executorch_jni_full",
-    srcs = shared_srcs,
+    srcs = ["jni_layer.cpp", "log.cpp", "jni_layer_runtime.cpp"],
     allow_jni_merging = False,
     compiler_flags = ET_JNI_COMPILER_FLAGS,
     soname = "libexecutorch.$(ext)",
@@ -79,7 +71,7 @@ non_fbcode_target(_kind = fb_android_cxx_library,
 
 non_fbcode_target(_kind = fb_android_cxx_library,
     name = "executorch_training_jni",
-    srcs = shared_srcs + ["jni_layer_training.cpp"],
+    srcs = ["jni_layer.cpp", "log.cpp", "jni_layer_runtime.cpp", "jni_layer_training.cpp"],
     allow_jni_merging = False,
     compiler_flags = ET_JNI_COMPILER_FLAGS + [
         "-DEXECUTORCH_BUILD_EXTENSION_TRAINING",
@@ -106,9 +98,11 @@ non_fbcode_target(_kind = fb_android_cxx_library,
 
 non_fbcode_target(_kind = fb_android_cxx_library,
     name = "executorch_llama_jni",
-    exclude_files = ["log.cpp"]
-    shared_srcs_filtered = [f for f in shared_srcs if f not in exclude_files]
-    srcs = shared_srcs_filtered + ["jni_layer_llama.cpp"]
+    srcs = [
+        "jni_layer.cpp",
+        "jni_layer_llama.cpp",
+        "jni_layer_runtime.cpp",
+    ],
     allow_jni_merging = False,
     compiler_flags = ET_JNI_COMPILER_FLAGS + [
         "-DEXECUTORCH_BUILD_LLAMA_JNI",
@@ -151,10 +145,6 @@ runtime.export_file(
     name = "jni_layer_runtime.cpp",
 )
 
-runtime.export_file(
-    name = "jni_helper.cpp",
-)
-
 runtime.cxx_library(
     name = "jni_headers",
     exported_headers = [
diff --git a/extension/android/jni/jni_helper.cpp b/extension/android/jni/jni_helper.cpp
diff --git a/extension/android/jni/jni_helper.h b/extension/android/jni/jni_helper.h
diff --git a/extension/android/jni/jni_layer.cpp b/extension/android/jni/jni_layer.cpp
diff --git a/extension/android/jni/selective_jni.buck.bzl b/extension/android/jni/selective_jni.buck.bzl
diff --git a/scripts/build_apple_frameworks.sh b/scripts/build_apple_frameworks.sh

Original file line number	Diff line number	Diff line change
`@@ -204,7 +204,7 @@ def is_in_local_scalar_dense_chain(self, node: torch.fx.Node) -> Tuple[bool, boo`
`204`	`204`	`def log_skip(self, node: torch.fx.Node, reason: str) -> None:`
`205`	`205`	`if node.op == "call_function":`
`206`	`206`	`logger.info(`
`207`		`- f"[Vulkan Partitioner] Due to [{reason}], skipping {node.format_node()}"`
	`207`	`+ f"[Vulkan Partitioner] Due to [{reason}], skipping {utils.node_io_str(node)}"`
`208`	`208`	`)`
`209`	`209`
`210`	`210`	`def is_node_supported(`
Original file line number	Diff line number	Diff line change
`@@ -387,6 +387,8 @@ def define_common_targets(is_fbcode = False):`
`387`	`387`	`"//executorch/backends/transforms:view_copy_to_squeeze_unsqueeze",`
`388`	`388`	`"//executorch/backends/vulkan/_passes:vulkan_passes",`
`389`	`389`	`"//executorch/backends/vulkan/serialization:lib",`
	`390`	`+ "//executorch/backends/transforms:remove_getitem_op",`
	`391`	`+ "//executorch/backends/xnnpack/_passes:xnnpack_passes",`
`390`	`392`	`"//executorch/exir/backend:backend_details",`
`391`	`393`	`],`
`392`	`394`	`)`