Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backends/arm/operator_support/ethos_u55_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ def is_node_supported( # noqa: C901
return False

if node.target in self.target_ops_i8:
if dtype not in (torch.int8,):
if dtype not in (torch.int8, torch.int16):
self.reporter.report_reject(
node, f"Unsupported dtype {dtype} (Supports i8)."
node, f"Unsupported dtype {dtype} (Supports i8, i16)."
)
return False

Expand Down
4 changes: 3 additions & 1 deletion backends/arm/quantizer/arm_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def get_symmetric_a16w8_quantization_config(
is_dynamic: bool = False,
weight_qmin: int = -127,
weight_qmax: int = 127,
epsilon: float = 2**-12,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok so in order for sig/tanh to be able to partition, we need to set this to 2**-16 right?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can be partitioned without it, this is more so for numerical behavior. If the set epsilon is too high, the quantization arguments can be inflated resulting in an incorrect output.
Thank you for review!

):
"""
16A8W quantization config: 16-bit activations, 8-bit weights.
Expand All @@ -174,11 +175,12 @@ def get_symmetric_a16w8_quantization_config(
is_dynamic: Whether to use dynamic quantization
weight_qmin: Minimum quantization value for weights
weight_qmax: Maximum quantization value for weights
epsilon: Value used to pad observed [qmin, qmax] before initial zero point and scale calculation

Returns:
QuantizationConfig with 16-bit activations and 8-bit weights
"""
extra_args: Dict[str, Any] = {"eps": 2**-12}
extra_args: Dict[str, Any] = {"eps": epsilon}

# Setup observer/fake-quant for 16-bit activations
if is_qat:
Expand Down
17 changes: 8 additions & 9 deletions backends/arm/test/ops/test_sigmoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"zeros": lambda: torch.zeros(10, 10, 10, 10),
"ones": lambda: torch.ones(10, 10, 10),
"rand": lambda: torch.rand(10, 10) - 0.5,
"rand_4d": lambda: torch.rand(1, 1, 5, 10),
"randn_pos": lambda: torch.randn(10) + 10,
"randn_neg": lambda: torch.randn(10) - 10,
"ramp": lambda: torch.arange(-16, 16, 0.2),
Expand Down Expand Up @@ -269,22 +270,23 @@ def get_symmetric_a16w8_sigmoid_quantizer(per_channel_quantization=False):
}

quantizer = TOSAQuantizer(tosa_profiles[tosa_version])

# Use a smaller episilon value to not greatly inflate [qmin, qmax]
quantizer.set_global(
get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
get_symmetric_a16w8_quantization_config(
is_per_channel=per_channel_quantization, epsilon=2**-16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this change impact other ops?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for review!
There could be other Table operators that may need this change I will update those as I go through all operators. But, this change will not impact others as its stands because its only set in the unit tests.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's set in unit tests so that the sig/tanh can be partitioned to U55/U85, right?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned below, this is just to ensure we get the correct values on the output.
Thanks for review!

)
)

return Quantize(
quantizer,
get_symmetric_a16w8_quantization_config(
is_per_channel=per_channel_quantization
is_per_channel=per_channel_quantization, epsilon=2**-16
),
)


@common.parametrize("test_data", test_data_suite)
@pytest.mark.xfail(
reason="missing int16 sigmoid ops support; fails at TOSA reference model with Unsupported operation type or rank. See: https://github.com/pytorch/executorch/issues/13974"
)
def test_sigmoid_16a8w_tosa_INT(test_data: torch.Tensor):
"""Test sigmoid operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
per_channel_quantization = False
Expand All @@ -311,7 +313,7 @@ def test_sigmoid_16a8w_tosa_INT(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
@pytest.mark.xfail(
reason="Vela compilation fails with 'Invalid arguments' for int16 sigmoid operations"
reason="MLETORCH-707: AssertionError: Output 0 does not match reference output."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This indicates we don't yet have full support for U55. Can you comment on what's remaining? cc: @3l1

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is more so a limitation in terms of U55 support and not a int16x8 specific issue.
Thanks for review!

)
def test_sigmoid_16a8w_u55_INT16(test_data: torch.Tensor):
"""Test sigmoid operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
Expand All @@ -337,9 +339,6 @@ def test_sigmoid_16a8w_u55_INT16(test_data: torch.Tensor):

@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
@pytest.mark.xfail(
reason="Vela compilation fails with 'Invalid arguments' for int16 sigmoid operations"
)
def test_sigmoid_16a8w_u85_INT16(test_data: torch.Tensor):
"""Test sigmoid operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
per_channel_quantization = False
Expand Down
190 changes: 0 additions & 190 deletions backends/arm/test/ops/test_sigmoid_16bit.py

This file was deleted.

16 changes: 7 additions & 9 deletions backends/arm/test/ops/test_tanh.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,22 +121,23 @@ def get_symmetric_a16w8_tanh_quantizer(per_channel_quantization=False):
}

quantizer = TOSAQuantizer(tosa_profiles[tosa_version])

# Use a smaller episilon value to not greatly inflate [qmin, qmax]
quantizer.set_global(
get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
get_symmetric_a16w8_quantization_config(
is_per_channel=per_channel_quantization, epsilon=2**-16
)
)

return Quantize(
quantizer,
get_symmetric_a16w8_quantization_config(
is_per_channel=per_channel_quantization
is_per_channel=per_channel_quantization, epsilon=2**-16
),
)


@common.parametrize("test_data", test_data_suite)
@pytest.mark.xfail(
reason="missing int16 tanh ops support; fails at TOSA reference model with Unsupported operation type or rank. See: https://github.com/pytorch/executorch/issues/13975"
)
def test_tanh_16a8w_tosa_INT(test_data: torch.Tensor):
"""Test tanh operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
per_channel_quantization = False
Expand All @@ -163,7 +164,7 @@ def test_tanh_16a8w_tosa_INT(test_data: torch.Tensor):
@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone300
@pytest.mark.xfail(
reason="Vela compilation fails with 'Invalid arguments' for int16 tanh operations"
reason="MLETORCH-707: AssertionError: Output 0 does not match reference output."
)
def test_tanh_16a8w_u55_INT16(test_data: torch.Tensor):
"""Test tanh operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
Expand All @@ -189,9 +190,6 @@ def test_tanh_16a8w_u55_INT16(test_data: torch.Tensor):

@common.parametrize("test_data", test_data_suite)
@common.XfailIfNoCorstone320
@pytest.mark.xfail(
reason="Vela compilation fails with 'Invalid arguments' for int16 tanh operations"
)
def test_tanh_16a8w_u85_INT16(test_data: torch.Tensor):
"""Test tanh operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
per_channel_quantization = False
Expand Down
Loading