Skip to content

Commit 618c572

Browse files
authored
Merge branch 'main' into gh/gasoonjia/36/orig
2 parents d9660cb + ec0e4a3 commit 618c572

File tree

57 files changed

+1363
-90
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1363
-90
lines changed

.ci/scripts/test_huggingface_optimum_model.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,14 +262,20 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
262262

263263
assert torch.allclose(
264264
eager_output.logits, et_output, atol=1e-02, rtol=1e-02
265-
), "CoreML output does not match eager"
265+
), "Model output does not match eager"
266266

267267

268268
if __name__ == "__main__":
269269
parser = argparse.ArgumentParser()
270270
parser.add_argument("--model", type=str, required=True)
271271
parser.add_argument("--recipe", type=str, required=True)
272272
parser.add_argument("--quantize", action="store_true", help="Enable quantization")
273+
parser.add_argument(
274+
"--model_dir",
275+
type=str,
276+
required=False,
277+
help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
278+
)
273279
args = parser.parse_args()
274280

275281
model_to_model_id_and_test_function = {
@@ -294,11 +300,11 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
294300
f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
295301
)
296302

303+
model_id, test_fn = model_to_model_id_and_test_function[args.model]
297304
with tempfile.TemporaryDirectory() as tmp_dir:
298-
model_id, test_fn = model_to_model_id_and_test_function[args.model]
299305
test_fn(
300306
model_id=model_id,
301-
model_dir=tmp_dir,
307+
model_dir=tmp_dir if args.model_dir is None else args.model_dir,
302308
recipe=args.recipe,
303309
quantize=args.quantize,
304310
)

.github/workflows/trunk.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,9 @@ jobs:
815815
smollm|coreml_fp32_gpu|--quantize,
816816
llama3|coreml_fp32_gpu|--quantize,
817817
olmo|coreml_fp32_gpu|--quantize,
818+
# roberta|coreml_fp32_gpu|--quantize, roberta requires special HF access
819+
bert|coreml_fp32_gpu|--quantize,
820+
distilbert|coreml_fp32_gpu|--quantize,
818821
]
819822
fail-fast: false
820823
with:

backends/arm/arm_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def vgf_compile_spec(
5757
f"Invalid TOSA version: {tosa_version}"
5858
)
5959

60-
if not ("FP" or "INT" in tosa_profiles):
60+
if "FP" not in tosa_profiles and "INT" not in tosa_profiles:
6161
raise ValueError(
6262
"Arm backend only supports converter-backend for FP or INT. "
6363
f"Invalid TOSA profile: {tosa_profiles}"

backends/arm/quantizer/arm_quantizer.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from __future__ import annotations
1515

1616
import functools
17-
from typing import Any, Callable, Dict, List, Optional
17+
from typing import Any, Callable, Dict, List, Optional, Union
1818

1919
import torch
2020
from executorch.backends.arm._passes import ArmPassManager
@@ -218,9 +218,35 @@ def not_module_type_or_name_filter(n: Node) -> bool:
218218

219219
class TOSAQuantizer(Quantizer):
220220

221-
def __init__(self, tosa_spec: TosaSpecification) -> None:
221+
def __init__(
222+
self, compile_spec_or_tosa_spec: Union[TosaSpecification, List[CompileSpec]]
223+
) -> None:
224+
222225
super().__init__()
223-
self.tosa_spec = tosa_spec
226+
if isinstance(compile_spec_or_tosa_spec, TosaSpecification):
227+
self.tosa_spec = compile_spec_or_tosa_spec
228+
self.compile_spec = None
229+
elif isinstance(compile_spec_or_tosa_spec, list):
230+
self.compile_spec = compile_spec_or_tosa_spec
231+
# find entry that is 'tosa_spec'
232+
for cs in compile_spec_or_tosa_spec:
233+
if cs.key == "tosa_spec":
234+
spec_val = (
235+
cs.value.decode() if isinstance(cs.value, bytes) else cs.value
236+
)
237+
self.tosa_spec = TosaSpecification.create_from_string(spec_val)
238+
break
239+
else:
240+
raise ValueError(
241+
"compile_spec list did not contain a 'tosa_spec' entry"
242+
)
243+
else:
244+
raise TypeError(
245+
f"TOSAQuantizer constructor expects "
246+
f"a TosaSpecification or compile_spec list, "
247+
f"got {type(compile_spec_or_tosa_spec)}"
248+
)
249+
224250
self.global_config: Optional[QuantizationConfig] = None
225251
self.io_config: Optional[QuantizationConfig] = None
226252
self.module_type_config: Dict[Callable, Optional[QuantizationConfig]] = {}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import copy
7+
8+
import pytest
9+
import torch
10+
from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder
11+
from executorch.backends.arm.quantizer import VgfQuantizer
12+
from executorch.backends.arm.quantizer.arm_quantizer import (
13+
get_symmetric_quantization_config,
14+
TOSAQuantizer,
15+
)
16+
17+
from executorch.backends.arm.test.common import SkipIfNoModelConverter
18+
from executorch.backends.arm.tosa_partitioner import TOSAPartitioner
19+
from executorch.backends.arm.tosa_specification import TosaSpecification
20+
from executorch.backends.arm.vgf_partitioner import VgfPartitioner
21+
from executorch.exir import to_edge_transform_and_lower
22+
from executorch.exir.passes.quantize_io_pass import extract_io_quant_params
23+
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
24+
25+
26+
class SimpleAdd(torch.nn.Module):
27+
def forward(self, x, y):
28+
return x + y
29+
30+
31+
@pytest.mark.parametrize(
32+
"builder_method, quantizer_cls, partitioner_cls",
33+
[
34+
("tosa_compile_spec", TOSAQuantizer, TOSAPartitioner),
35+
pytest.param(
36+
"vgf_compile_spec",
37+
VgfQuantizer,
38+
VgfPartitioner,
39+
marks=SkipIfNoModelConverter,
40+
id="VGF",
41+
),
42+
],
43+
)
44+
def test_roundtrip_extracts_io_params(builder_method, quantizer_cls, partitioner_cls):
45+
"""
46+
Validates that IO quantization parameters round-trip for both flows.
47+
"""
48+
example_inputs = (
49+
torch.ones(1, 5),
50+
torch.full((1, 5), 2.0),
51+
)
52+
mod = SimpleAdd().eval()
53+
54+
base_spec = TosaSpecification.create_from_string("TOSA-1.0+INT")
55+
compile_spec = getattr(ArmCompileSpecBuilder(), builder_method)(
56+
tosa_spec=base_spec
57+
).build()
58+
59+
quantizer = quantizer_cls(compile_spec)
60+
operator_config = get_symmetric_quantization_config(is_qat=True)
61+
quantizer.set_global(operator_config)
62+
63+
exported = torch.export.export_for_training(
64+
mod, copy.deepcopy(example_inputs), strict=True
65+
)
66+
prepared = prepare_pt2e(exported.module(), quantizer)
67+
_ = prepared(*example_inputs)
68+
69+
converted = convert_pt2e(prepared)
70+
final_export = torch.export.export_for_training(
71+
converted, example_inputs, strict=True
72+
)
73+
partitioner = partitioner_cls(compile_spec)
74+
edge_prog = to_edge_transform_and_lower(final_export, partitioner=[partitioner])
75+
76+
# Extract IO quantization parameters
77+
q = extract_io_quant_params(
78+
edge_prog,
79+
input_idxs=(0, 1),
80+
output_idxs=(0,),
81+
)
82+
83+
assert "inputs" in q
84+
assert "outputs" in q
85+
assert len(q["inputs"]) == 2
86+
assert len(q["outputs"]) == 1
87+
88+
for name, params in q["inputs"].items():
89+
assert isinstance(name, str)
90+
assert isinstance(params["scale"], float)
91+
assert isinstance(params["zero_point"], int)
92+
93+
out_name, out_params = next(iter(q["outputs"].items()))
94+
assert isinstance(out_name, str)
95+
assert isinstance(out_params["scale"], float)
96+
assert isinstance(out_params["zero_point"], int)

backends/arm/test/models/test_conformer.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
EthosU85PipelineINT,
1616
TosaPipelineFP,
1717
TosaPipelineINT,
18+
VgfPipeline,
1819
)
1920

2021
from torchaudio.models import Conformer
@@ -124,3 +125,40 @@ def test_conformer_u85_INT():
124125
atol=5.0,
125126
)
126127
pipeline.run()
128+
129+
130+
@common.SkipIfNoModelConverter
131+
def test_conformer_vgf_INT():
132+
pipeline = VgfPipeline[input_t](
133+
TestConformer.conformer,
134+
TestConformer.model_example_inputs,
135+
aten_op=TestConformer.aten_ops,
136+
exir_op=[],
137+
tosa_version="TOSA-1.0+INT",
138+
use_to_edge_transform_and_lower=True,
139+
)
140+
pipeline.pop_stage("check_count.exir")
141+
142+
# TODO: MLETORCH-1167 Create Vulkan backend e2e tests
143+
# pipeline.change_args(
144+
# "run_method_and_compare_outputs",
145+
# get_test_inputs(
146+
# TestConformer.dim, TestConformer.lengths, TestConformer.num_examples
147+
# ),
148+
# rtol=1.0,
149+
# atol=3.0,
150+
# )
151+
pipeline.run()
152+
153+
154+
@common.SkipIfNoModelConverter
155+
def test_conformer_vgf_FP():
156+
pipeline = VgfPipeline[input_t](
157+
TestConformer.conformer,
158+
TestConformer.model_example_inputs,
159+
aten_op=TestConformer.aten_ops,
160+
exir_op=[],
161+
tosa_version="TOSA-1.0+FP",
162+
use_to_edge_transform_and_lower=True,
163+
)
164+
pipeline.run()

backends/arm/test/models/test_deit_tiny_arm.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,12 @@
1111

1212
import torch
1313

14+
from executorch.backends.arm.test import common
15+
1416
from executorch.backends.arm.test.tester.test_pipeline import (
1517
TosaPipelineFP,
1618
TosaPipelineINT,
19+
VgfPipeline,
1720
)
1821

1922
from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
@@ -56,3 +59,31 @@ def test_deit_tiny_tosa_INT():
5659
qtol=1,
5760
)
5861
pipeline.run()
62+
63+
64+
@common.SkipIfNoModelConverter
65+
def test_deit_tiny_vgf_INT():
66+
pipeline = VgfPipeline[input_t](
67+
deit_tiny,
68+
model_inputs,
69+
aten_op=[],
70+
exir_op=[],
71+
tosa_version="TOSA-1.0+INT",
72+
use_to_edge_transform_and_lower=True,
73+
atol=1.5,
74+
qtol=1,
75+
)
76+
pipeline.run()
77+
78+
79+
@common.SkipIfNoModelConverter
80+
def test_deit_tiny_vgf_FP():
81+
pipeline = VgfPipeline[input_t](
82+
deit_tiny,
83+
model_inputs,
84+
aten_op=[],
85+
exir_op=[],
86+
tosa_version="TOSA-1.0+FP",
87+
use_to_edge_transform_and_lower=True,
88+
)
89+
pipeline.run()

backends/arm/test/models/test_dl3_arm.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
EthosU85PipelineINT,
1717
TosaPipelineFP,
1818
TosaPipelineINT,
19+
VgfPipeline,
1920
)
2021

2122
from executorch.examples.models import deeplab_v3
@@ -87,3 +88,37 @@ def test_dl3_u85_INT():
8788
"run_method_and_compare_outputs", rtol=1.0, atol=1.0
8889
) # TODO: MLETORCH-1036 decrease tolerance
8990
pipeline.run()
91+
92+
93+
@common.SkipIfNoModelConverter
94+
def test_dl3_vgf_INT():
95+
pipeline = VgfPipeline[input_t](
96+
TestDl3.dl3,
97+
TestDl3.model_example_inputs,
98+
aten_op=[],
99+
exir_op=[],
100+
tosa_version="TOSA-1.0+INT",
101+
use_to_edge_transform_and_lower=True,
102+
)
103+
# TODO: MLETORCH-1167 Create Vulkan backend e2e tests
104+
# pipeline.change_args(
105+
# "run_method_and_compare_outputs", rtol=1.0, atol=1.0
106+
# )
107+
pipeline.run()
108+
109+
110+
@common.SkipIfNoModelConverter
111+
def test_dl3_vgf_FP():
112+
pipeline = VgfPipeline[input_t](
113+
TestDl3.dl3,
114+
TestDl3.model_example_inputs,
115+
aten_op=[],
116+
exir_op=[],
117+
tosa_version="TOSA-1.0+FP",
118+
use_to_edge_transform_and_lower=True,
119+
)
120+
# TODO: MLETORCH-1167 Create Vulkan backend e2e tests
121+
# pipeline.change_args(
122+
# "run_method_and_compare_outputs", rtol=1.0, atol=1.0
123+
# )
124+
pipeline.run()

backends/arm/test/models/test_llama.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
import torch
1818
from executorch.backends.arm._passes import InsertCastForOpsWithInt64InputPass
1919

20-
from executorch.backends.arm.test import conftest
20+
from executorch.backends.arm.test import common, conftest
2121
from executorch.backends.arm.test.tester.test_pipeline import (
2222
TosaPipelineFP,
2323
TosaPipelineINT,
24+
VgfPipeline,
2425
)
2526
from executorch.examples.models.llama.export_llama_lib import (
2627
build_args_parser,
@@ -131,3 +132,42 @@ def test_llama_tosa_INT():
131132
use_to_edge_transform_and_lower=True,
132133
)
133134
pipeline.run()
135+
136+
137+
@common.SkipIfNoModelConverter
138+
def test_llama_vgf_FP():
139+
llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
140+
141+
if llama_model is None or llama_inputs is None:
142+
pytest.skip("Missing model and/or input files")
143+
144+
with torch.no_grad():
145+
pipeline = VgfPipeline[input_t](
146+
llama_model,
147+
llama_inputs,
148+
aten_op=[],
149+
exir_op=[],
150+
tosa_version="TOSA-1.0+FP",
151+
use_to_edge_transform_and_lower=True,
152+
)
153+
pipeline.run()
154+
155+
156+
@common.SkipIfNoModelConverter
157+
def test_llama_vgf_INT():
158+
llama_model, llama_inputs, llama_meta = TestLlama().prepare_model()
159+
160+
if llama_model is None or llama_inputs is None:
161+
pytest.skip("Missing model and/or input files")
162+
163+
with torch.no_grad():
164+
pipeline = VgfPipeline[input_t](
165+
llama_model,
166+
llama_inputs,
167+
aten_op=[],
168+
exir_op=[],
169+
tosa_version="TOSA-1.0+INT",
170+
use_to_edge_transform_and_lower=True,
171+
transform_passes=[InsertCastForOpsWithInt64InputPass()],
172+
)
173+
pipeline.run()

0 commit comments

Comments
 (0)