pytorch
diff --git a/‎.ci/scripts/test_huggingface_optimum_model.py‎
Lines changed: 9 additions & 3 deletions b/‎.ci/scripts/test_huggingface_optimum_model.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/arm_backend.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/arm_backend.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/quantizer/arm_quantizer.py‎
Lines changed: 29 additions & 3 deletions b/‎backends/arm/quantizer/arm_quantizer.py‎
Lines changed: 29 additions & 3 deletions
diff --git a/‎backends/arm/test/misc/test_extract_io_params_tosa.py‎
Lines changed: 96 additions & 0 deletions b/‎backends/arm/test/misc/test_extract_io_params_tosa.py‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎backends/arm/test/models/test_conformer.py‎
Lines changed: 38 additions & 0 deletions b/‎backends/arm/test/models/test_conformer.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎backends/arm/test/models/test_deit_tiny_arm.py‎
Lines changed: 31 additions & 0 deletions b/‎backends/arm/test/models/test_deit_tiny_arm.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎backends/arm/test/models/test_dl3_arm.py‎
Lines changed: 35 additions & 0 deletions b/‎backends/arm/test/models/test_dl3_arm.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎backends/arm/test/models/test_llama.py‎
Lines changed: 41 additions & 1 deletion b/‎backends/arm/test/models/test_llama.py‎
Lines changed: 41 additions & 1 deletion
@@ -262,14 +262,20 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
 
     assert torch.allclose(
         eager_output.logits, et_output, atol=1e-02, rtol=1e-02
-    ), "CoreML output does not match eager"
+    ), "Model output does not match eager"
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--model", type=str, required=True)
     parser.add_argument("--recipe", type=str, required=True)
     parser.add_argument("--quantize", action="store_true", help="Enable quantization")
+    parser.add_argument(
+        "--model_dir",
+        type=str,
+        required=False,
+        help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
+    )
     args = parser.parse_args()
 
     model_to_model_id_and_test_function = {
@@ -294,11 +300,11 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
             f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
         )
 
+    model_id, test_fn = model_to_model_id_and_test_function[args.model]
     with tempfile.TemporaryDirectory() as tmp_dir:
-        model_id, test_fn = model_to_model_id_and_test_function[args.model]
         test_fn(
             model_id=model_id,
-            model_dir=tmp_dir,
+            model_dir=tmp_dir if args.model_dir is None else args.model_dir,
             recipe=args.recipe,
             quantize=args.quantize,
         )
@@ -815,6 +815,9 @@ jobs:
           smollm|coreml_fp32_gpu|--quantize,
           llama3|coreml_fp32_gpu|--quantize,
           olmo|coreml_fp32_gpu|--quantize,
+          # roberta|coreml_fp32_gpu|--quantize, roberta requires special HF access
+          bert|coreml_fp32_gpu|--quantize,
+          distilbert|coreml_fp32_gpu|--quantize,
         ]
       fail-fast: false
     with:
 
@@ -57,7 +57,7 @@ def vgf_compile_spec(
                 f"Invalid TOSA version: {tosa_version}"
             )
 
-        if not ("FP" or "INT" in tosa_profiles):
+        if "FP" not in tosa_profiles and "INT" not in tosa_profiles:
             raise ValueError(
                 "Arm backend only supports converter-backend for FP or INT. "
                 f"Invalid TOSA profile: {tosa_profiles}"
 
@@ -14,7 +14,7 @@
 from __future__ import annotations
 
 import functools
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 import torch
 from executorch.backends.arm._passes import ArmPassManager
@@ -218,9 +218,35 @@ def not_module_type_or_name_filter(n: Node) -> bool:
 
 class TOSAQuantizer(Quantizer):
 
-    def __init__(self, tosa_spec: TosaSpecification) -> None:
+    def __init__(
+        self, compile_spec_or_tosa_spec: Union[TosaSpecification, List[CompileSpec]]
+    ) -> None:
+
         super().__init__()
-        self.tosa_spec = tosa_spec
+        if isinstance(compile_spec_or_tosa_spec, TosaSpecification):
+            self.tosa_spec = compile_spec_or_tosa_spec
+            self.compile_spec = None
+        elif isinstance(compile_spec_or_tosa_spec, list):
+            self.compile_spec = compile_spec_or_tosa_spec
+            # find entry that is 'tosa_spec'
+            for cs in compile_spec_or_tosa_spec:
+                if cs.key == "tosa_spec":
+                    spec_val = (
+                        cs.value.decode() if isinstance(cs.value, bytes) else cs.value
+                    )
+                    self.tosa_spec = TosaSpecification.create_from_string(spec_val)
+                    break
+            else:
+                raise ValueError(
+                    "compile_spec list did not contain a 'tosa_spec' entry"
+                )
+        else:
+            raise TypeError(
+                f"TOSAQuantizer constructor expects "
+                f"a TosaSpecification or compile_spec list, "
+                f"got {type(compile_spec_or_tosa_spec)}"
+            )
+
         self.global_config: Optional[QuantizationConfig] = None
         self.io_config: Optional[QuantizationConfig] = None
         self.module_type_config: Dict[Callable, Optional[QuantizationConfig]] = {}
 
@@ -0,0 +1,96 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+
+import pytest
+import torch
+from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder
+from executorch.backends.arm.quantizer import VgfQuantizer
+from executorch.backends.arm.quantizer.arm_quantizer import (
+    get_symmetric_quantization_config,
+    TOSAQuantizer,
+)
+
+from executorch.backends.arm.test.common import SkipIfNoModelConverter
+from executorch.backends.arm.tosa_partitioner import TOSAPartitioner
+from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.arm.vgf_partitioner import VgfPartitioner
+from executorch.exir import to_edge_transform_and_lower
+from executorch.exir.passes.quantize_io_pass import extract_io_quant_params
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+
+
+class SimpleAdd(torch.nn.Module):
+    def forward(self, x, y):
+        return x + y
+
+
+@pytest.mark.parametrize(
+    "builder_method, quantizer_cls, partitioner_cls",
+    [
+        ("tosa_compile_spec", TOSAQuantizer, TOSAPartitioner),
+        pytest.param(
+            "vgf_compile_spec",
+            VgfQuantizer,
+            VgfPartitioner,
+            marks=SkipIfNoModelConverter,
+            id="VGF",
+        ),
+    ],
+)
+def test_roundtrip_extracts_io_params(builder_method, quantizer_cls, partitioner_cls):
+    """
+    Validates that IO quantization parameters round-trip for both flows.
+    """
+    example_inputs = (
+        torch.ones(1, 5),
+        torch.full((1, 5), 2.0),
+    )
+    mod = SimpleAdd().eval()
+
+    base_spec = TosaSpecification.create_from_string("TOSA-1.0+INT")
+    compile_spec = getattr(ArmCompileSpecBuilder(), builder_method)(
+        tosa_spec=base_spec
+    ).build()
+
+    quantizer = quantizer_cls(compile_spec)
+    operator_config = get_symmetric_quantization_config(is_qat=True)
+    quantizer.set_global(operator_config)
+
+    exported = torch.export.export_for_training(
+        mod, copy.deepcopy(example_inputs), strict=True
+    )
+    prepared = prepare_pt2e(exported.module(), quantizer)
+    _ = prepared(*example_inputs)
+
+    converted = convert_pt2e(prepared)
+    final_export = torch.export.export_for_training(
+        converted, example_inputs, strict=True
+    )
+    partitioner = partitioner_cls(compile_spec)
+    edge_prog = to_edge_transform_and_lower(final_export, partitioner=[partitioner])
+
+    # Extract IO quantization parameters
+    q = extract_io_quant_params(
+        edge_prog,
+        input_idxs=(0, 1),
+        output_idxs=(0,),
+    )
+
+    assert "inputs" in q
+    assert "outputs" in q
+    assert len(q["inputs"]) == 2
+    assert len(q["outputs"]) == 1
+
+    for name, params in q["inputs"].items():
+        assert isinstance(name, str)
+        assert isinstance(params["scale"], float)
+        assert isinstance(params["zero_point"], int)
+
+    out_name, out_params = next(iter(q["outputs"].items()))
+    assert isinstance(out_name, str)
+    assert isinstance(out_params["scale"], float)
+    assert isinstance(out_params["zero_point"], int)
@@ -15,6 +15,7 @@
     EthosU85PipelineINT,
     TosaPipelineFP,
     TosaPipelineINT,
+    VgfPipeline,
 )
 
 from torchaudio.models import Conformer
@@ -124,3 +125,40 @@ def test_conformer_u85_INT():
         atol=5.0,
     )
     pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_conformer_vgf_INT():
+    pipeline = VgfPipeline[input_t](
+        TestConformer.conformer,
+        TestConformer.model_example_inputs,
+        aten_op=TestConformer.aten_ops,
+        exir_op=[],
+        tosa_version="TOSA-1.0+INT",
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.pop_stage("check_count.exir")
+
+    # TODO: MLETORCH-1167 Create Vulkan backend e2e tests
+    # pipeline.change_args(
+    #     "run_method_and_compare_outputs",
+    #     get_test_inputs(
+    #         TestConformer.dim, TestConformer.lengths, TestConformer.num_examples
+    #     ),
+    #     rtol=1.0,
+    #     atol=3.0,
+    # )
+    pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_conformer_vgf_FP():
+    pipeline = VgfPipeline[input_t](
+        TestConformer.conformer,
+        TestConformer.model_example_inputs,
+        aten_op=TestConformer.aten_ops,
+        exir_op=[],
+        tosa_version="TOSA-1.0+FP",
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
@@ -11,9 +11,12 @@
 
 import torch
 
+from executorch.backends.arm.test import common
+
 from executorch.backends.arm.test.tester.test_pipeline import (
     TosaPipelineFP,
     TosaPipelineINT,
+    VgfPipeline,
 )
 
 from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
@@ -56,3 +59,31 @@ def test_deit_tiny_tosa_INT():
         qtol=1,
     )
     pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_deit_tiny_vgf_INT():
+    pipeline = VgfPipeline[input_t](
+        deit_tiny,
+        model_inputs,
+        aten_op=[],
+        exir_op=[],
+        tosa_version="TOSA-1.0+INT",
+        use_to_edge_transform_and_lower=True,
+        atol=1.5,
+        qtol=1,
+    )
+    pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_deit_tiny_vgf_FP():
+    pipeline = VgfPipeline[input_t](
+        deit_tiny,
+        model_inputs,
+        aten_op=[],
+        exir_op=[],
+        tosa_version="TOSA-1.0+FP",
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.run()
@@ -16,6 +16,7 @@
     EthosU85PipelineINT,
     TosaPipelineFP,
     TosaPipelineINT,
+    VgfPipeline,
 )
 
 from executorch.examples.models import deeplab_v3
@@ -87,3 +88,37 @@ def test_dl3_u85_INT():
         "run_method_and_compare_outputs", rtol=1.0, atol=1.0
     )  # TODO: MLETORCH-1036 decrease tolerance
     pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_dl3_vgf_INT():
+    pipeline = VgfPipeline[input_t](
+        TestDl3.dl3,
+        TestDl3.model_example_inputs,
+        aten_op=[],
+        exir_op=[],
+        tosa_version="TOSA-1.0+INT",
+        use_to_edge_transform_and_lower=True,
+    )
+    # TODO: MLETORCH-1167 Create Vulkan backend e2e tests
+    # pipeline.change_args(
+    #     "run_method_and_compare_outputs", rtol=1.0, atol=1.0
+    # )
+    pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_dl3_vgf_FP():
+    pipeline = VgfPipeline[input_t](
+        TestDl3.dl3,
+        TestDl3.model_example_inputs,
+        aten_op=[],
+        exir_op=[],
+        tosa_version="TOSA-1.0+FP",
+        use_to_edge_transform_and_lower=True,
+    )
+    # TODO: MLETORCH-1167 Create Vulkan backend e2e tests
+    # pipeline.change_args(
+    #     "run_method_and_compare_outputs", rtol=1.0, atol=1.0
+    # )
+    pipeline.run()
@@ -17,10 +17,11 @@
 import torch
 from executorch.backends.arm._passes import InsertCastForOpsWithInt64InputPass
 
-from executorch.backends.arm.test import conftest
+from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.test_pipeline import (
     TosaPipelineFP,
     TosaPipelineINT,
+    VgfPipeline,
 )
 from executorch.examples.models.llama.export_llama_lib import (
     build_args_parser,
@@ -131,3 +132,42 @@ def test_llama_tosa_INT():
             use_to_edge_transform_and_lower=True,
         )
         pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_llama_vgf_FP():
+    llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
+
+    if llama_model is None or llama_inputs is None:
+        pytest.skip("Missing model and/or input files")
+
+    with torch.no_grad():
+        pipeline = VgfPipeline[input_t](
+            llama_model,
+            llama_inputs,
+            aten_op=[],
+            exir_op=[],
+            tosa_version="TOSA-1.0+FP",
+            use_to_edge_transform_and_lower=True,
+        )
+        pipeline.run()
+
+
+@common.SkipIfNoModelConverter
+def test_llama_vgf_INT():
+    llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
+
+    if llama_model is None or llama_inputs is None:
+        pytest.skip("Missing model and/or input files")
+
+    with torch.no_grad():
+        pipeline = VgfPipeline[input_t](
+            llama_model,
+            llama_inputs,
+            aten_op=[],
+            exir_op=[],
+            tosa_version="TOSA-1.0+INT",
+            use_to_edge_transform_and_lower=True,
+            transform_passes=[InsertCastForOpsWithInt64InputPass()],
+        )
+        pipeline.run()
Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ def vgf_compile_spec(`
`57`	`57`	`f"Invalid TOSA version: {tosa_version}"`
`58`	`58`	`)`
`59`	`59`
`60`		`- if not ("FP" or "INT" in tosa_profiles):`
	`60`	`+ if "FP" not in tosa_profiles and "INT" not in tosa_profiles:`
`61`	`61`	`raise ValueError(`
`62`	`62`	`"Arm backend only supports converter-backend for FP or INT. "`
`63`	`63`	`f"Invalid TOSA profile: {tosa_profiles}"`