Revert "Arm backend: Add 16A8W linear ops support and test (#13754)" (#13895)

lucylq · web-flow · commit 94284d79f566 · 2025-09-02T20:50:50.000-07:00
This reverts commit f8156fb. ### Summary [PLEASE REMOVE] See [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests) for ExecuTorch PR guidelines. [PLEASE REMOVE] If this PR closes an issue, please add a `Fixes #<issue-id>` line. [PLEASE REMOVE] If this PR introduces a fix or feature that should be the upcoming release notes, please add a "Release notes: <area>" label. For a list of available release notes labels, check out [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests). ### Test plan [PLEASE REMOVE] How did you test this PR? Please write down any manual commands you used and note down tests that you have written if applicable.
diff --git a/backends/arm/operators/op_transpose.py b/backends/arm/operators/op_transpose.py
@@ -44,17 +44,17 @@ def define_node(
 
         validate_num_inputs(self.target, inputs, 2)
         validate_same_dtype(self.target, [inputs[0], output], ts)
-
-        valid_dtypes = [ts.DType.BOOL]
-        if self.tosa_spec.support_integer():
-            valid_dtypes.extend([ts.DType.INT8, ts.DType.INT16])
-        if self.tosa_spec.support_float():
-            valid_dtypes.extend([ts.DType.FP16, ts.DType.FP32])
-
         validate_valid_dtype(
             self.target,
             [inputs[0], output],
-            valid_dtypes,
+            [
+                ts.DType.INT8,
+                ts.DType.INT16,
+                ts.DType.INT32,
+                ts.DType.FP32,
+                ts.DType.BOOL,
+                ts.DType.FP16,
+            ],
             output.tosa_spec,
         )
 
diff --git a/backends/arm/operators/op_view.py b/backends/arm/operators/op_view.py
@@ -41,16 +41,10 @@ def define_node(
 
         validate_num_inputs(self.target, inputs, 2)
         validate_same_dtype(self.target, [inputs[0], output], ts)
-        valid_dtypes = [ts.DType.BOOL]
-        if self.tosa_spec.support_integer():
-            valid_dtypes.extend([ts.DType.INT8, ts.DType.INT16])
-        if self.tosa_spec.support_float():
-            valid_dtypes.extend([ts.DType.FP16, ts.DType.FP32])
-
         validate_valid_dtype(
             self.target,
             [inputs[0], output],
-            valid_dtypes,
+            [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32, ts.DType.BOOL],
             output.tosa_spec,
         )
 
diff --git a/backends/arm/test/ops/test_linear.py b/backends/arm/test/ops/test_linear.py
@@ -9,12 +9,9 @@
 from typing import Tuple
 
 import pytest
+
 import torch
-from executorch.backends.arm.quantizer.arm_quantizer import (
-    get_symmetric_a16w8_quantization_config,
-    TOSAQuantizer,
-)
-from executorch.backends.arm.test import common, conftest
+from executorch.backends.arm.test import common
 
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
@@ -23,8 +20,6 @@
     TosaPipelineINT,
     VgfPipeline,
 )
-from executorch.backends.arm.tosa_specification import TosaSpecification
-from executorch.backends.xnnpack.test.tester import Quantize
 
 aten_op = "torch.ops.aten.linear.default"
 
@@ -148,6 +143,7 @@ def test_linear_tosa_FP(test_data: torch.Tensor):
     pipeline.run()
 
 
+@pytest.mark.flaky(reruns=5)  # TODO: Investigate flakyness.
 @common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
 def test_linear_tosa_INT(test_data: torch.Tensor):
     test_data, out_features, has_bias, per_channel_quantization = test_data()
@@ -247,64 +243,3 @@ def test_linear_vgf_INT(test_data: torch.Tensor):
         per_channel_quantization=per_channel_quantization,
     )
     pipeline.run()
-
-
-def get_symmetric_a16w8_linear_quantizer(
-    u55_config=False, per_channel_quantization=False
-):
-    tosa_version = conftest.get_option("tosa_version")
-    tosa_profiles = {
-        "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"),
-    }
-
-    quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
-    quantizer.set_global(
-        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
-    )
-    quantizer.set_module_type(
-        torch.nn.Linear,
-        get_symmetric_a16w8_quantization_config(
-            is_per_channel=per_channel_quantization
-        ),
-    )
-
-    return Quantize(
-        quantizer,
-        get_symmetric_a16w8_quantization_config(
-            is_per_channel=per_channel_quantization
-        ),
-    )
-
-
-@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
-@pytest.mark.xfail(
-    reason="missing int16 linear ops support; fails at TOSA reference model run with Invalid TOSA graph"
-)
-def test_linear_16a8w_tosa_INT(test_data: torch.Tensor):
-    """Test linear operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
-    test_data, out_features, has_bias, per_channel_quantization = test_data()
-    in_features = test_data.shape[-1]
-
-    # Create pipeline with custom 16A8W quantization config
-    pipeline = TosaPipelineINT[input_t1](
-        Linear(
-            in_features=in_features,
-            out_features=out_features,
-            bias=has_bias,
-        ),
-        (test_data,),
-        aten_op,
-        exir_op=[],
-        per_channel_quantization=per_channel_quantization,
-        use_to_edge_transform_and_lower=True,
-        tosa_extensions=["int16"],
-    )
-
-    pipeline.change_args(
-        "quantize",
-        get_symmetric_a16w8_linear_quantizer(
-            per_channel_quantization=per_channel_quantization
-        ),
-    )
-    # Run the pipeline
-    pipeline.run()