Update on "[ET-VK] Introduce AOT operator registry"

SS-JIA · SS-JIA · commit bda9a8d8b82d · 2024-10-25T14:14:33.000-07:00
## Changes Move the following files to the root directory of Vulkan backend: * `backends/vulkan/partitioner/supported_ops.py` -> `backends/vulkan/op_registry.py` * `backends/vulkan/_passes/custom_ops_defs.py` -> `backends/vulkan/custom_ops_lib.py` In the new `op_registry.py` file, the way operator features are specified is reworked to provide much more detail about the features of the operator implementation in Vulkan. See the new `OpFeatures` class for more details. An example of registering a new operator to the export flow is ``` update_features( [ exir_ops.edge.aten._log_softmax.default, exir_ops.edge.aten._softmax.default, exir_ops.edge.aten.mean.dim, exir_ops.edge.aten.sum.dim_IntList, exir_ops.edge.aten.amax.default, exir_ops.edge.aten.amin.default, ] ) def register_reduce_op(features: OpFeatures): features.texture_impl = TextureImplFeatures( uses_packed_dim=True, ) features.resize_fn = True def check_reduce_node(node: torch.fx.Node) -> bool: dim_list = node.args[1] assert isinstance(dim_list, list) if len(dim_list) != 1: return False keepdim = node.args[2] assert isinstance(keepdim, bool) if not keepdim: return False return True features.check_node_fn = check_reduce_node return features ``` ## Rationale The purpose of these changes is to centralize operator definitions so that there is a common source of truth about the capabilities of operator implementation in Vulkan. This way, the partitioner does not have to implement ad-hoc functions for specific operators (i.e. `is_valid_to_copy`) and graph transforms do not have to maintain their own operator metadata (`USES_WEIGHTS` in `insert_prepack_nodes`). Differential Revision: [D64915640](https://our.internmc.facebook.com/intern/diff/D64915640/) [ghstack-poisoned]
diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py
@@ -132,7 +132,11 @@ def quantized_conv_meta(
     out_shift: torch.Tensor,
     channel_last: bool = False,
 ) -> torch.Tensor:
-    out_channels, _in_channels, *kernel_size = weight.shape
+    if channel_last:
+        out_channels, *kernel_size, _ = weight.shape
+    else:
+        out_channels, _, *kernel_size = weight.shape
+
     in_size = input.shape
     # Assert that the input tensor has at least 3 dimensions, and at most 6
     assert len(in_size) > 2
@@ -141,7 +145,13 @@ def quantized_conv_meta(
     # Compute the output tensor size
     output_size = (
         get_conv1d_output_size(
-            in_size, out_channels, stride[1], padding[1], dilation[1], kernel_size[0]
+            in_size,
+            out_channels,
+            stride[1],
+            padding[1],
+            dilation[1],
+            kernel_size[0],
+            channel_last,
         )
         if len(in_size) == 3
         else get_conv2d_output_size(
diff --git a/backends/cadence/aot/utils.py b/backends/cadence/aot/utils.py
@@ -43,14 +43,20 @@ def get_conv1d_output_size(
     padding: int,
     dilation: int,
     kernel_size: int,
+    channel_last: bool,
 ) -> torch.Size:
     assert len(in_size) == 3
-    N, C, L = in_size
+    if channel_last:
+        N, L, C = in_size
+    else:
+        N, C, L = in_size
 
     # Reference: https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
     lout = (L + 2 * padding - dilation * (kernel_size - 1) - 1) // stride + 1
 
-    return torch.Size((in_size[0], out_channels, lout))
+    if channel_last:
+        return torch.Size((N, lout, out_channels))
+    return torch.Size((N, out_channels, lout))
 
 
 # Get the output size of a 2D convolution given the input size and parameters
@@ -76,7 +82,8 @@ def get_conv2d_output_size(
     wout = (W + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) // stride[
         1
     ] + 1
-
+    if channel_last:
+        return torch.Size((N, hout, wout, out_channels))
     return torch.Size((in_size[0], out_channels, hout, wout))
 
 
diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py
@@ -92,8 +92,9 @@ def __init__(
 def update_features(aten_op):
     def features_decorator(fn: Callable):
         def update_features_impl(op: OpKey):
-            if op not in vulkan_supported_ops:
-                vulkan_supported_ops[op] = OpFeatures()
+            if op in vulkan_supported_ops:
+                raise RuntimeError(f"[Vulkan delegate] duplicate registration of {op}!")
+            vulkan_supported_ops[op] = OpFeatures()
             vulkan_supported_ops[op] = fn(vulkan_supported_ops[op])
 
         if isinstance(aten_op, list):
@@ -165,7 +166,6 @@ def register_binary_op(features: OpFeatures):
         exir_ops.edge.aten.sqrt.default,
         exir_ops.edge.aten.rsqrt.default,
         exir_ops.edge.aten.tanh.default,
-        exir_ops.edge.aten._to_copy.default,
     ]
 )
 def register_unary_op(features: OpFeatures):
@@ -216,8 +216,6 @@ def check_to_copy_node(node: torch.fx.Node) -> bool:
         exir_ops.edge.aten.mm.default,
         exir_ops.edge.aten.addmm.default,
         exir_ops.edge.aten.linear.default,
-        exir_ops.edge.et_vk.linear_weight_int4.default,
-        exir_ops.edge.aten._weight_int8pack_mm.default,
     ]
 )
 def register_mm_op(features: OpFeatures):
@@ -276,8 +274,6 @@ def register_softmax_op(features: OpFeatures):
 
 @update_features(
     [
-        exir_ops.edge.aten._log_softmax.default,
-        exir_ops.edge.aten._softmax.default,
         exir_ops.edge.aten.mean.dim,
         exir_ops.edge.aten.sum.dim_IntList,
         exir_ops.edge.aten.amax.default,
@@ -366,9 +362,6 @@ def register_view_op(features: OpFeatures):
 # packed tensors only and do not have a resize function.
 @update_features(
     [
-        # Normalization
-        exir_ops.edge.aten._native_batch_norm_legit_no_training.default,
-        exir_ops.edge.aten.native_layer_norm.default,
         # Shape Manipulation
         exir_ops.edge.aten.squeeze_copy.dims,
         exir_ops.edge.aten.unsqueeze_copy.default,
diff --git a/backends/vulkan/partitioner/vulkan_partitioner.py b/backends/vulkan/partitioner/vulkan_partitioner.py
@@ -139,12 +139,16 @@ def is_in_local_scalar_dense_chain(self, node: torch.fx.Node) -> bool:
 
         return False
 
+    def log_skip(self, node: torch.fx.Node, reason: str) -> None:
+        if node.op == "call_function":
+            logger.info(
+                f"[Vulkan Partitioner] Due to [{reason}], skipping {node.format_node()}"
+            )
+
     def is_node_supported(
         self, submodules: Mapping[str, torch.nn.Module], node: torch.fx.Node
     ) -> bool:
         r = self._is_node_supported(submodules, node)
-        if not r and node.op == "call_function":
-            logger.info(f"Skipping node in Vulkan partitioning: {node.format_node()}")
         return r
 
     def _is_node_supported(
@@ -163,14 +167,17 @@ def _is_node_supported(
             return True
 
         if target not in vulkan_supported_ops:
+            self.log_skip(node, "not in vulkan_supported_ops")
             return False
 
         features = vulkan_supported_ops[target]
 
         if not features.check_node_fn(node):
+            self.log_skip(node, "op args not supported")
             return False
 
         if self.require_dynamic_shapes and not features.resize_fn:
+            self.log_skip(node, "no dynamic shape support")
             return False
 
         return self.all_args_compatible(node)
diff --git a/backends/vulkan/serialization/TARGETS b/backends/vulkan/serialization/TARGETS
@@ -1,4 +1,4 @@
 load(":targets.bzl", "define_common_targets")
 oncall("executorch")
 
-define_common_targets()
+define_common_targets(is_fbcode = True)
diff --git a/backends/vulkan/serialization/targets.bzl b/backends/vulkan/serialization/targets.bzl
@@ -1,28 +1,6 @@
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 
-def define_common_targets():
-    runtime.python_library(
-        name = "lib",
-        srcs = [
-            "vulkan_graph_builder.py",
-            "vulkan_graph_schema.py",
-            "vulkan_graph_serialize.py",
-        ],
-        resources = [
-            "schema.fbs",
-        ],
-        visibility = [
-            "//executorch/...",
-            "//executorch/vulkan/...",
-            "@EXECUTORCH_CLIENTS",
-        ],
-        deps = [
-            "//executorch/exir:graph_module",
-            "//executorch/exir/_serialize:_bindings",
-            "//executorch/exir/_serialize:lib",
-        ],
-    )
-
+def define_common_targets(is_fbcode = False):
     runtime.genrule(
         name = "gen_vk_delegate_schema",
         srcs = ["schema.fbs"],
@@ -57,3 +35,26 @@ def define_common_targets():
             "flatbuffers-api",
         ],
     )
+
+    if is_fbcode:
+        runtime.python_library(
+            name = "lib",
+            srcs = [
+                "vulkan_graph_builder.py",
+                "vulkan_graph_schema.py",
+                "vulkan_graph_serialize.py",
+            ],
+            resources = [
+                "schema.fbs",
+            ],
+            visibility = [
+                "//executorch/...",
+                "//executorch/vulkan/...",
+                "@EXECUTORCH_CLIENTS",
+            ],
+            deps = [
+                "//executorch/exir:graph_module",
+                "//executorch/exir/_serialize:_bindings",
+                "//executorch/exir/_serialize:lib",
+            ],
+        )
diff --git a/backends/vulkan/targets.bzl b/backends/vulkan/targets.bzl
@@ -203,59 +203,59 @@ def define_common_targets(is_fbcode = False):
     ##
     ## AOT targets
     ##
+    if is_fbcode:
+        runtime.python_library(
+            name = "custom_ops_lib",
+            srcs = [
+                "custom_ops_lib.py"
+            ],
+            visibility = [
+                "//executorch/...",
+                "//executorch/vulkan/...",
+                "@EXECUTORCH_CLIENTS",
+            ],
+            deps = [
+                "//caffe2:torch",
+            ]
+        )
 
-    runtime.python_library(
-        name = "custom_ops_lib",
-        srcs = [
-            "custom_ops_lib.py"
-        ],
-        visibility = [
-            "//executorch/...",
-            "//executorch/vulkan/...",
-            "@EXECUTORCH_CLIENTS",
-        ],
-        deps = [
-            "//caffe2:torch",
-        ]
-    )
+        runtime.python_library(
+            name = "op_registry",
+            srcs = [
+                "op_registry.py",
+            ],
+            visibility = [
+                "//executorch/...",
+                "//executorch/vulkan/...",
+                "@EXECUTORCH_CLIENTS",
+            ],
+            deps = [
+                ":custom_ops_lib",
+                "//caffe2:torch",
+                "//executorch/exir/dialects:lib",
+                "//executorch/backends/vulkan/serialization:lib",
+            ]
+        )
 
-    runtime.python_library(
-        name = "op_registry",
-        srcs = [
-            "op_registry.py",
-        ],
-        visibility = [
-            "//executorch/...",
-            "//executorch/vulkan/...",
-            "@EXECUTORCH_CLIENTS",
-        ],
-        deps = [
-            ":custom_ops_lib",
-            "//caffe2:torch",
-            "//executorch/exir/dialects:lib",
-            "//executorch/backends/vulkan/serialization:lib",
-        ]
-    )
-
-    runtime.python_library(
-        name = "vulkan_preprocess",
-        srcs = [
-            "vulkan_preprocess.py",
-        ],
-        visibility = [
-            "//executorch/...",
-            "//executorch/vulkan/...",
-            "@EXECUTORCH_CLIENTS",
-        ],
-        deps = [
-            "//executorch/backends/transforms:addmm_mm_to_linear",
-            "//executorch/backends/transforms:fuse_batch_norm_with_conv",
-            "//executorch/backends/transforms:fuse_conv_with_clamp",
-            "//executorch/backends/transforms:fuse_dequant_linear",
-            "//executorch/backends/transforms:fuse_view_copy",
-            "//executorch/backends/transforms:remove_clone_ops",
-            "//executorch/backends/vulkan/_passes:vulkan_passes",
-            "//executorch/backends/vulkan/serialization:lib",
-            "//executorch/exir/backend:backend_details",
-        ],
-    )
+        runtime.python_library(
+            name = "vulkan_preprocess",
+            srcs = [
+                "vulkan_preprocess.py",
+            ],
+            visibility = [
+                "//executorch/...",
+                "//executorch/vulkan/...",
+                "@EXECUTORCH_CLIENTS",
+            ],
+            deps = [
+                "//executorch/backends/transforms:addmm_mm_to_linear",
+                "//executorch/backends/transforms:fuse_batch_norm_with_conv",
+                "//executorch/backends/transforms:fuse_conv_with_clamp",
+                "//executorch/backends/transforms:fuse_dequant_linear",
+                "//executorch/backends/transforms:fuse_view_copy",
+                "//executorch/backends/transforms:remove_clone_ops",
+                "//executorch/backends/vulkan/_passes:vulkan_passes",
+                "//executorch/backends/vulkan/serialization:lib",
+                "//executorch/exir/backend:backend_details",
+            ],
+        )