Update

swolchok · swolchok · commit da58a574e995 · 2025-02-27T13:41:34.000-08:00
[ghstack-poisoned]
diff --git a/backends/arm/operator_support/pool_2d_support.py b/backends/arm/operator_support/pool_2d_support.py
@@ -26,8 +26,8 @@ def stride_check(strides: tuple[int, int]) -> bool:
 
 
 def dim_check(shape=torch.Size) -> bool:
-    check = shape[0] == 1
-    for dim in shape:
+    check = True
+    for dim in shape[1:]:
         check &= 1 <= dim <= 65536
     return check
 
@@ -59,7 +59,7 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
             if not kernel_check(kernel):
                 return False
 
-        return dim_check(shape) and stride_check(stride)
+        return dim_check(shape) and shape[0] == 1 and stride_check(stride)
 
 
 @register_tosa_support_check
diff --git a/backends/arm/runtime/EthosUBackend.cpp b/backends/arm/runtime/EthosUBackend.cpp
@@ -193,6 +193,10 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
       supported |=
           (tensor_in.scalar_type() == ScalarType::Char and
            handles.inputs->io[i].elem_size == 1);
+      // 16 bit int (IOQDQ pass prepared networks)
+      supported |=
+          (tensor_in.scalar_type() == ScalarType::Short and
+           handles.inputs->io[i].elem_size == 2);
       if (!supported) {
         ET_LOG(
             Error,
@@ -220,6 +224,8 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
           handles.inputs->io[i].elem_size == 1;
       bool both_int = tensor_in.scalar_type() == ScalarType::Int and
           handles.inputs->io[i].elem_size == 4;
+      bool both_short = tensor_in.scalar_type() == ScalarType::Short and
+          handles.inputs->io[i].elem_size == 2;
 
       // Select a compatible copy routine
       if (both_char and permuted_input_shape) {
@@ -233,7 +239,7 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
             tensor_in.size(1),
             tensor_in.size(2),
             tensor_in.size(3));
-      } else if (both_char or both_int) {
+      } else if (both_char or both_int or both_short) {
         EXECUTORCH_PROF_SCOPE(
             event_tracer, "+EthosUBackend::execute()handles.input.memcpy()");
         // Sizes match and elt size matches so memcpy
diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py
@@ -1,6 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -232,8 +232,24 @@ def test_maxpool2d_tosa_u85_BI_mult_batches(
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,))
 
+    @parameterized.expand(test_data_suite_mult_batches)
+    @pytest.mark.corstone_fvp
+    @conftest.expectedFailureOnFVP  # TODO: MLETORCH-433
+    def test_maxpool2d_tosa_u55_BI_mult_batches(
+        self,
+        test_name: str,
+        test_data: torch.Tensor,
+        model_params: int | Tuple[int, int],
+    ):
+        tester = self._test_maxpool2d_tosa_ethos_BI_pipeline(
+            self.MaxPool2d(*model_params),
+            common.get_u55_compile_spec(),
+            (test_data,),
+        )
+        if conftest.is_option_enabled("corstone_fvp"):
+            tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,))
+
     reject_data_suite = [
-        (MaxPool2d(1, 1, 0), torch.rand(2, 5, 5, 5)),
         (MaxPool2d(1, 4, 0), torch.rand(1, 10, 10, 10)),
         (MaxPool2d((1, 257), 1, 0), torch.rand(1, 16, 5, 300)),
         (MaxPool2d((800, 90), 1, 0), torch.rand(1, 16, 850, 100)),
diff --git a/backends/arm/test/ops/test_rshift.py b/backends/arm/test/ops/test_rshift.py
@@ -1,5 +1,4 @@
 # Copyright 2024-2025 Arm Limited and/or its affiliates.
-# All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -75,16 +74,14 @@ def test_rshift_tosa_MI(self, test_data):
     def test_rshift_tosa_BI(self, test_data):
         self._test_rshift_tosa_BI(test_data)
 
-    # TODO: MLETORCH-644 - Add support for INT16 input/output
-    @parameterized.expand(Rshift.test_data[:-1])
+    @parameterized.expand(Rshift.test_data)
     def test_rshift_u55_BI(self, test_data):
         compile_spec = common.get_u55_compile_spec()
         tester = self._test_rshift_ethosu_BI(test_data, compile_spec)
         if conftest.is_option_enabled("corstone_fvp"):
             tester.run_method_and_compare_outputs(atol=1, inputs=test_data)
 
-    # TODO: MLETORCH-644 - Add support for INT16 input/output
-    @parameterized.expand(Rshift.test_data[:-1])
+    @parameterized.expand(Rshift.test_data)
     def test_rshift_u85_BI(self, test_data):
         compile_spec = common.get_u85_compile_spec()
         tester = self._test_rshift_ethosu_BI(test_data, compile_spec)
diff --git a/backends/xnnpack/test/TARGETS b/backends/xnnpack/test/TARGETS
@@ -53,7 +53,7 @@ runtime.python_test(
     srcs = glob([
         "models/*.py",
     ]),
-    tags = ["long_running"],
+    labels = ["long_running"],
     deps = [
         "fbsource//third-party/pypi/timm:timm",
         "fbsource//third-party/pypi/torchsr:torchsr",  # @manual
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -676,47 +676,62 @@ def _validate_args(args):
             )
 
 
-def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
-    _validate_args(args)
-
-    pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(args)
-
-    # export_to_edge
-    builder_exported = _prepare_for_llama_export(args).export()
-
-    builder_exported.run_canonical_optimizations()
-
-    if args.export_only:
-        exit()
-
-    builder_exported_to_edge = builder_exported.pt2e_quantize(
-        quantizers
-    ).export_to_edge()
-
-    modelname = builder_exported_to_edge.modelname
-
-    # to_backend
+def _to_edge_and_lower_llama_xnnpack(
+    builder_exported,
+    modelname,
+    additional_passes,
+    pt2e_quant_params,
+    quantizers,
+    quant_dtype,
+    args,
+) -> LLMEdgeManager:  # noqa: C901
     partitioners = []
 
     # Order matters here, dynamic quantization should be applied first when both xnnpack and xnnpack_extended_ops are enabled
-    if (
-        pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None
-    ) or (args.xnnpack):
-        partitioners.append(
-            get_xnnpack_partitioner(dynamic_quant_only_partitioner=True)
-        )
+    partitioners.append(get_xnnpack_partitioner(dynamic_quant_only_partitioner=True))
 
-        # force xnnpack to be true if pt2e_quant_params is not None and args.xnnpack is False
-        args.xnnpack = True
-        modelname = f"xnnpack_dq_{modelname}"
+    modelname = f"xnnpack_dq_{modelname}"
 
     if args.xnnpack_extended_ops:
-        assert args.xnnpack, "xnnpack_extended_ops requires xnnpack to be enabled"
         partitioners.append(
             get_xnnpack_partitioner(dynamic_quant_only_partitioner=False)
         )
         modelname = f"xnnpack_{modelname}"
 
+    logging.info("Lowering model using following partitioner(s): ")
+    for partitioner in partitioners:
+        logging.info(f"--> {partitioner.__class__.__name__}")
+
+    # TODO: Enable generating ETRecord with XNNPack and to_edge_transform_and_lower().
+    if args.generate_etrecord:
+        raise NotImplementedError(
+            "export_llama does not support XNNPack and generating ETRecord at the moment."
+        )
+
+    builder = builder_exported.pt2e_quantize(quantizers).to_edge_transform_and_lower(
+        partitioners
+    )
+    if args.verbose:
+        print_delegation_info(builder.edge_manager.exported_program().graph_module)
+
+    return builder.to_executorch(passes=additional_passes)
+
+
+def _to_edge_and_lower_llama(  # noqa: C901
+    builder_exported,
+    modelname,
+    additional_passes,
+    pt2e_quant_params,
+    quantizers,
+    quant_dtype,
+    args,
+):
+    builder_exported_to_edge = builder_exported.pt2e_quantize(
+        quantizers
+    ).export_to_edge()
+
+    # to_backend
+    partitioners = []
     if args.vulkan:
         partitioners.append(
             get_vulkan_partitioner(
@@ -731,7 +746,6 @@ def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
         modelname = f"vulkan_{modelname}"
 
         # Need to remove asserts from the graph to prevent graph breaks
-        # pyre-ignore: Undefined attribute [16]: `Optional` has no attribute `exported_program`.
         remove_asserts(builder_exported_to_edge.edge_manager.exported_program())
 
     if args.mps:
@@ -760,13 +774,11 @@ def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
         # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils`
         from executorch.backends.qualcomm.utils.utils import _transform, tag_quant_io
 
-        # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`, Optional type has no attribute `exported_program`
         _transform(builder_exported_to_edge.edge_manager.exported_program())
 
         if args.num_sharding > 0:
             model_sharding.split_graph(
                 builder_exported_to_edge.edge_manager.exported_program(),
-                # pyre-fixme[16]: `Optional` has no attribute `__getitem__`.
                 builder_exported_to_edge.metadata["get_n_layers"],
                 shares=args.num_sharding,
             )
@@ -792,19 +804,15 @@ def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
                     atten.head_dim,
                 )
             )
-        # pyre-ignore
         tag_quant_io(
             builder_exported_to_edge.edge_manager.exported_program().graph_module,
-            partial(get_custom_quant_ios_dtype, cache_shape),  # pyre-ignore
+            partial(get_custom_quant_ios_dtype, cache_shape),
         )
 
     logging.info("Lowering model using following partitioner(s): ")
     for partitioner in partitioners:
         logging.info(f"--> {partitioner.__class__.__name__}")
 
-    additional_passes = []
-    if args.model in TORCHTUNE_DEFINED_MODELS:
-        additional_passes = [InitializedMutableBufferPass(["kv_cache_pos"])]
     if args.generate_etrecord:
         if not builder_exported_to_edge.edge_manager:
             raise ValueError("Unable to generate etrecord due to missing edge manager.")
@@ -818,7 +826,6 @@ def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
         if args.num_sharding > 0 and args.qnn:
             from executorch.backends.qualcomm.utils.utils import canonicalize_program
 
-            # pyre-fixme[16]: Module `backends` has no attribute `qualcomm`.
             canonicalize_program(builder.edge_manager.exported_program())
 
         builder = builder.to_executorch(
@@ -840,11 +847,55 @@ def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
         if args.num_sharding > 0 and args.qnn:
             from executorch.backends.qualcomm.utils.utils import canonicalize_program
 
-            # pyre-fixme[16]: Module `backends` has no attribute `qualcomm`.
             canonicalize_program(builder.edge_manager.exported_program())
 
         builder = builder.to_executorch(passes=additional_passes)
 
+    return builder
+
+
+def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
+    _validate_args(args)
+
+    pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(args)
+
+    additional_passes = []
+    if args.model in TORCHTUNE_DEFINED_MODELS:
+        additional_passes = [InitializedMutableBufferPass(["kv_cache_pos"])]
+
+    # export_to_edge
+    builder_exported = _prepare_for_llama_export(args).export()
+    builder_exported.run_canonical_optimizations()
+    modelname = builder_exported.modelname
+
+    if args.export_only:
+        exit()
+
+    if pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None:
+        # Force xnnpack to be true if pt2e_quant_params is not None and args.xnnpack is False
+        args.xnnpack = True
+
+    if args.xnnpack:
+        builder = _to_edge_and_lower_llama_xnnpack(
+            builder_exported,
+            modelname,
+            additional_passes,
+            pt2e_quant_params,
+            quantizers,
+            quant_dtype,
+            args,
+        )
+    else:
+        builder = _to_edge_and_lower_llama(
+            builder_exported,
+            modelname,
+            additional_passes,
+            pt2e_quant_params,
+            quantizers,
+            quant_dtype,
+            args,
+        )
+
     if args.profile_memory:
         generate_memory_trace(builder.export_program, "memory_profile.json")
 
@@ -866,7 +917,6 @@ def _export_llama(args) -> LLMEdgeManager:  # noqa: C901
         output_file = f"{builder.output_dir}/{modelname}.pte"
 
     builder.save_to_pte(output_file)
-
     return builder
 
 
diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py
@@ -67,7 +67,6 @@ def export(self) -> "LlavaEdgeManager":
                 dynamic_shapes=dynamic_shape,
                 strict=False,
             )
-            # pyre-ignore: Incompatible attribute type [8]: Attribute `pre_autograd_graph_module` declared in class `LLMEdgeManager` has type `Optional[GraphModule]` but is used as type `Module`.
             self.pre_autograd_graph_module = self.export_program.module()
         return self
 
diff --git a/exir/dialects/edge/test/TARGETS b/exir/dialects/edge/test/TARGETS
@@ -10,7 +10,7 @@ python_unittest(
     resources = {
         "//executorch/exir/dialects/edge:edge_yaml": "edge.yaml",
     },
-    tags = ["long_running"],
+    labels = ["long_running"],
     deps = [
         "fbsource//third-party/pypi/expecttest:expecttest",  # @manual
         "//caffe2:torch",
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
diff --git a/kernels/portable/cpu/op_argmin.cpp b/kernels/portable/cpu/op_argmin.cpp
diff --git a/setup.py b/setup.py
diff --git a/third-party/ao b/third-party/ao

Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,6 @@ def export(self) -> "LlavaEdgeManager":`
`67`	`67`	`dynamic_shapes=dynamic_shape,`
`68`	`68`	`strict=False,`
`69`	`69`	`)`
`70`		- # pyre-ignore: Incompatible attribute type [8]: Attribute `pre_autograd_graph_module` declared in class `LLMEdgeManager` has type `Optional[GraphModule]` but is used as type `Module`.
`71`	`70`	`self.pre_autograd_graph_module = self.export_program.module()`
`72`	`71`	`return self`
`73`	`72`