Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ jobs:
docker-image: executorch-ubuntu-22.04-arm-sdk
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
timeout: 120
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
Expand Down
2 changes: 1 addition & 1 deletion backends/arm/quantizer/arm_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@

@functools.lru_cache
def get_symmetric_quantization_config(
is_per_channel: bool = False,
is_per_channel: bool = True,
is_qat: bool = False,
is_dynamic: bool = False,
act_qmin: int = -128,
Expand Down
4 changes: 3 additions & 1 deletion backends/arm/test/misc/test_bn_relu_folding_qat.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ def test_qat_tosa_BI(model: torch.nn.Module):
"quantize",
Quantize(
quantizer=quantizer,
quantization_config=get_symmetric_quantization_config(is_qat=True),
quantization_config=get_symmetric_quantization_config(
is_qat=True, is_per_channel=False
),
is_qat=True,
),
)
Expand Down
13 changes: 12 additions & 1 deletion backends/arm/test/ops/test_multihead_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,14 @@ def test_multihead_attention_tosa_MI(test_data: input_t1):
)
def test_multihead_attention_tosa_BI(test_data):
test_data, module = test_data()
pipeline = TosaPipelineBI(module, (*test_data, *test_data, *test_data), [], [])
pipeline = TosaPipelineBI(
module,
(*test_data, *test_data, *test_data),
[],
[],
# TODO: Per-channel quantization is broken (MLETORCH-1144)
per_channel_quantization=False,
)
pipeline.run()


Expand All @@ -72,6 +79,8 @@ def test_multihead_attention_u55_BI(test_data: input_t1):
[],
use_to_edge_transform_and_lower=True,
run_on_fvp=True,
# TODO: Per-channel quantization is broken (MLETORCH-1144)
per_channel_quantization=False,
)
pipeline.pop_stage("check_count.exir")
pipeline.run()
Expand All @@ -92,5 +101,7 @@ def test_multihead_attention_u85_BI(test_data: input_t1):
[],
use_to_edge_transform_and_lower=True,
run_on_fvp=True,
# TODO: Per-channel quantization is broken (MLETORCH-1144)
per_channel_quantization=False,
)
pipeline.run()
4 changes: 3 additions & 1 deletion backends/arm/test/test_arm_baremetal.sh
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ test_models_ethos-u55() { # End to End model tests using model_test.py
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-64 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-256 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03"
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-128 --model=resnet18 --extra_flags="-DET_ATOL=0.2 -DET_RTOL=0.2"
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-128 --model=resnet50 --extra_flags="-DET_ATOL=0.2 -DET_RTOL=0.2"
# TODO: Output performance for resnet50 is bad with per-channel quantization (MLETORCH-1149).
# Also we get OOM when running this model. Disable it for now.
#python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-128 --model=resnet50 --extra_flags="-DET_ATOL=6.2 -DET_RTOL=6.2"

echo "${TEST_SUITE_NAME}: PASS"
}
Expand Down
58 changes: 25 additions & 33 deletions backends/arm/test/tester/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def __init__(
run_on_tosa_ref_model: bool = True,
tosa_version: str = "TOSA-0.80+BI",
symmetric_io_quantization: bool = False,
per_channel_quantization: bool = False,
per_channel_quantization: bool = True,
use_to_edge_transform_and_lower: bool = True,
custom_path: str = None,
atol: float = 1e-03,
Expand All @@ -317,16 +317,14 @@ def __init__(
compile_spec = common.get_tosa_compile_spec(
tosa_profiles[tosa_version], custom_path=custom_path
)
if symmetric_io_quantization or per_channel_quantization:
quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)
else:
quant_stage = None

quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)

super().__init__(
module,
Expand Down Expand Up @@ -475,24 +473,21 @@ def __init__(
exir_ops: Optional[str | List[str]] = None,
run_on_fvp: bool = True,
symmetric_io_quantization: bool = False,
per_channel_quantization: bool = False,
per_channel_quantization: bool = True,
use_to_edge_transform_and_lower: bool = True,
custom_path: str = None,
atol: float = 1e-03,
rtol: float = 1e-03,
qtol: int = 1,
):
compile_spec = common.get_u55_compile_spec(custom_path=custom_path)
if symmetric_io_quantization or per_channel_quantization:
quantizer = EthosUQuantizer(compile_spec)
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)
else:
quant_stage = None
quantizer = EthosUQuantizer(compile_spec)
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)

super().__init__(
module,
Expand Down Expand Up @@ -565,24 +560,21 @@ def __init__(
exir_ops: str | List[str] = None,
run_on_fvp: bool = True,
symmetric_io_quantization: bool = False,
per_channel_quantization: bool = False,
per_channel_quantization: bool = True,
use_to_edge_transform_and_lower: bool = True,
custom_path: str = None,
atol: float = 1e-03,
rtol: float = 1e-03,
qtol: int = 1,
):
compile_spec = common.get_u85_compile_spec(custom_path=custom_path)
if symmetric_io_quantization or per_channel_quantization:
quantizer = EthosUQuantizer(compile_spec)
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)
else:
quant_stage = None
quantizer = EthosUQuantizer(compile_spec)
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)

super().__init__(
module,
Expand Down
3 changes: 1 addition & 2 deletions examples/arm/aot_arm_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,7 @@ def quantize(
else:
raise RuntimeError("Unsupported compilespecs for quantization!")

# if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel
operator_config = get_symmetric_quantization_config(is_per_channel=False)
operator_config = get_symmetric_quantization_config()
quantizer.set_global(operator_config)
m = prepare_pt2e(model, quantizer)

Expand Down
2 changes: 1 addition & 1 deletion examples/arm/ethos_u_minimal_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"\n",
"# Create and configure quantizer to use a symmetric quantization config globally on all nodes\n",
"quantizer = EthosUQuantizer(compile_spec)\n",
"operator_config = get_symmetric_quantization_config(is_per_channel=False)\n",
"operator_config = get_symmetric_quantization_config()\n",
"quantizer.set_global(operator_config)\n",
"\n",
"# Post training quantization\n",
Expand Down
Loading