|
9 | 9 | from typing import Tuple
|
10 | 10 |
|
11 | 11 | import pytest
|
| 12 | + |
12 | 13 | import torch
|
13 |
| -from executorch.backends.arm.quantizer.arm_quantizer import ( |
14 |
| - get_symmetric_a16w8_quantization_config, |
15 |
| - TOSAQuantizer, |
16 |
| -) |
17 |
| -from executorch.backends.arm.test import common, conftest |
| 14 | +from executorch.backends.arm.test import common |
18 | 15 |
|
19 | 16 | from executorch.backends.arm.test.tester.test_pipeline import (
|
20 | 17 | EthosU55PipelineINT,
|
|
23 | 20 | TosaPipelineINT,
|
24 | 21 | VgfPipeline,
|
25 | 22 | )
|
26 |
| -from executorch.backends.arm.tosa_specification import TosaSpecification |
27 |
| -from executorch.backends.xnnpack.test.tester import Quantize |
28 | 23 |
|
29 | 24 | aten_op = "torch.ops.aten.linear.default"
|
30 | 25 |
|
@@ -148,6 +143,7 @@ def test_linear_tosa_FP(test_data: torch.Tensor):
|
148 | 143 | pipeline.run()
|
149 | 144 |
|
150 | 145 |
|
| 146 | +@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness. |
151 | 147 | @common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
|
152 | 148 | def test_linear_tosa_INT(test_data: torch.Tensor):
|
153 | 149 | test_data, out_features, has_bias, per_channel_quantization = test_data()
|
@@ -247,64 +243,3 @@ def test_linear_vgf_INT(test_data: torch.Tensor):
|
247 | 243 | per_channel_quantization=per_channel_quantization,
|
248 | 244 | )
|
249 | 245 | pipeline.run()
|
250 |
| - |
251 |
| - |
252 |
| -def get_symmetric_a16w8_linear_quantizer( |
253 |
| - u55_config=False, per_channel_quantization=False |
254 |
| -): |
255 |
| - tosa_version = conftest.get_option("tosa_version") |
256 |
| - tosa_profiles = { |
257 |
| - "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"), |
258 |
| - } |
259 |
| - |
260 |
| - quantizer = TOSAQuantizer(tosa_profiles[tosa_version]) |
261 |
| - quantizer.set_global( |
262 |
| - get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization) |
263 |
| - ) |
264 |
| - quantizer.set_module_type( |
265 |
| - torch.nn.Linear, |
266 |
| - get_symmetric_a16w8_quantization_config( |
267 |
| - is_per_channel=per_channel_quantization |
268 |
| - ), |
269 |
| - ) |
270 |
| - |
271 |
| - return Quantize( |
272 |
| - quantizer, |
273 |
| - get_symmetric_a16w8_quantization_config( |
274 |
| - is_per_channel=per_channel_quantization |
275 |
| - ), |
276 |
| - ) |
277 |
| - |
278 |
| - |
279 |
| -@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT) |
280 |
| -@pytest.mark.xfail( |
281 |
| - reason="missing int16 linear ops support; fails at TOSA reference model run with Invalid TOSA graph" |
282 |
| -) |
283 |
| -def test_linear_16a8w_tosa_INT(test_data: torch.Tensor): |
284 |
| - """Test linear operation with 16A8W quantization (16-bit activations, 8-bit weights)""" |
285 |
| - test_data, out_features, has_bias, per_channel_quantization = test_data() |
286 |
| - in_features = test_data.shape[-1] |
287 |
| - |
288 |
| - # Create pipeline with custom 16A8W quantization config |
289 |
| - pipeline = TosaPipelineINT[input_t1]( |
290 |
| - Linear( |
291 |
| - in_features=in_features, |
292 |
| - out_features=out_features, |
293 |
| - bias=has_bias, |
294 |
| - ), |
295 |
| - (test_data,), |
296 |
| - aten_op, |
297 |
| - exir_op=[], |
298 |
| - per_channel_quantization=per_channel_quantization, |
299 |
| - use_to_edge_transform_and_lower=True, |
300 |
| - tosa_extensions=["int16"], |
301 |
| - ) |
302 |
| - |
303 |
| - pipeline.change_args( |
304 |
| - "quantize", |
305 |
| - get_symmetric_a16w8_linear_quantizer( |
306 |
| - per_channel_quantization=per_channel_quantization |
307 |
| - ), |
308 |
| - ) |
309 |
| - # Run the pipeline |
310 |
| - pipeline.run() |
0 commit comments