Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ jobs:
docker-image: executorch-ubuntu-22.04-arm-sdk
submodules: 'recursive'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
timeout: 120
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
Expand Down
2 changes: 1 addition & 1 deletion backends/arm/quantizer/arm_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@

@functools.lru_cache
def get_symmetric_quantization_config(
is_per_channel: bool = False,
is_per_channel: bool = True,
is_qat: bool = False,
is_dynamic: bool = False,
act_qmin: int = -128,
Expand Down
4 changes: 3 additions & 1 deletion backends/arm/test/misc/test_bn_relu_folding_qat.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ def test_qat_tosa_BI(model: torch.nn.Module):
"quantize",
Quantize(
quantizer=quantizer,
quantization_config=get_symmetric_quantization_config(is_qat=True),
quantization_config=get_symmetric_quantization_config(
is_qat=True, is_per_channel=False
),
is_qat=True,
),
)
Expand Down
13 changes: 12 additions & 1 deletion backends/arm/test/ops/test_multihead_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,14 @@ def test_multihead_attention_tosa_MI(test_data: input_t1):
)
def test_multihead_attention_tosa_BI(test_data):
test_data, module = test_data()
pipeline = TosaPipelineBI(module, (*test_data, *test_data, *test_data), [], [])
pipeline = TosaPipelineBI(
module,
(*test_data, *test_data, *test_data),
[],
[],
# TODO: Per-channel quantization is broken (MLETORCH-1144)
per_channel_quantization=False,
)
pipeline.run()


Expand All @@ -72,6 +79,8 @@ def test_multihead_attention_u55_BI(test_data: input_t1):
[],
use_to_edge_transform_and_lower=True,
run_on_fvp=True,
# TODO: Per-channel quantization is broken (MLETORCH-1144)
per_channel_quantization=False,
)
pipeline.pop_stage("check_count.exir")
pipeline.run()
Expand All @@ -92,5 +101,7 @@ def test_multihead_attention_u85_BI(test_data: input_t1):
[],
use_to_edge_transform_and_lower=True,
run_on_fvp=True,
# TODO: Per-channel quantization is broken (MLETORCH-1144)
per_channel_quantization=False,
)
pipeline.run()
4 changes: 3 additions & 1 deletion backends/arm/test/test_arm_baremetal.sh
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ test_models_ethos-u55() { # End to End model tests using model_test.py
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-64 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-256 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03"
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-128 --model=resnet18 --extra_flags="-DET_ATOL=0.2 -DET_RTOL=0.2"
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-128 --model=resnet50 --extra_flags="-DET_ATOL=0.2 -DET_RTOL=0.2"
# TODO: Output performance for resnet50 is bad with per-channel quantization (MLETORCH-1149).
# Also we get OOM when running this model. Disable it for now.
#python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u55-128 --model=resnet50 --extra_flags="-DET_ATOL=6.2 -DET_RTOL=6.2"

echo "${TEST_SUITE_NAME}: PASS"
}
Expand Down
58 changes: 25 additions & 33 deletions backends/arm/test/tester/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def __init__(
run_on_tosa_ref_model: bool = True,
tosa_version: str = "TOSA-0.80+BI",
symmetric_io_quantization: bool = False,
per_channel_quantization: bool = False,
per_channel_quantization: bool = True,
use_to_edge_transform_and_lower: bool = True,
custom_path: str = None,
atol: float = 1e-03,
Expand All @@ -317,16 +317,14 @@ def __init__(
compile_spec = common.get_tosa_compile_spec(
tosa_profiles[tosa_version], custom_path=custom_path
)
if symmetric_io_quantization or per_channel_quantization:
quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)
else:
quant_stage = None

quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)

super().__init__(
module,
Expand Down Expand Up @@ -475,24 +473,21 @@ def __init__(
exir_ops: Optional[str | List[str]] = None,
run_on_fvp: bool = True,
symmetric_io_quantization: bool = False,
per_channel_quantization: bool = False,
per_channel_quantization: bool = True,
use_to_edge_transform_and_lower: bool = True,
custom_path: str = None,
atol: float = 1e-03,
rtol: float = 1e-03,
qtol: int = 1,
):
compile_spec = common.get_u55_compile_spec(custom_path=custom_path)
if symmetric_io_quantization or per_channel_quantization:
quantizer = EthosUQuantizer(compile_spec)
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)
else:
quant_stage = None
quantizer = EthosUQuantizer(compile_spec)
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)

super().__init__(
module,
Expand Down Expand Up @@ -565,24 +560,21 @@ def __init__(
exir_ops: str | List[str] = None,
run_on_fvp: bool = True,
symmetric_io_quantization: bool = False,
per_channel_quantization: bool = False,
per_channel_quantization: bool = True,
use_to_edge_transform_and_lower: bool = True,
custom_path: str = None,
atol: float = 1e-03,
rtol: float = 1e-03,
qtol: int = 1,
):
compile_spec = common.get_u85_compile_spec(custom_path=custom_path)
if symmetric_io_quantization or per_channel_quantization:
quantizer = EthosUQuantizer(compile_spec)
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)
else:
quant_stage = None
quantizer = EthosUQuantizer(compile_spec)
quantization_config = get_symmetric_quantization_config(
is_per_channel=per_channel_quantization
)
if symmetric_io_quantization:
quantizer.set_io(quantization_config)
quant_stage = Quantize(quantizer, quantization_config)

super().__init__(
module,
Expand Down
3 changes: 1 addition & 2 deletions examples/arm/aot_arm_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,7 @@ def quantize(
else:
raise RuntimeError("Unsupported compilespecs for quantization!")

# if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel
operator_config = get_symmetric_quantization_config(is_per_channel=False)
operator_config = get_symmetric_quantization_config()
quantizer.set_global(operator_config)
m = prepare_pt2e(model, quantizer)

Expand Down
2 changes: 1 addition & 1 deletion examples/arm/ethos_u_minimal_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"\n",
"# Create and configure quantizer to use a symmetric quantization config globally on all nodes\n",
"quantizer = EthosUQuantizer(compile_spec)\n",
"operator_config = get_symmetric_quantization_config(is_per_channel=False)\n",
"operator_config = get_symmetric_quantization_config()\n",
"quantizer.set_global(operator_config)\n",
"\n",
"# Post training quantization\n",
Expand Down
Loading