Skip to content

Commit bc0a47b

Browse files
committed
feat: QSYMM8_PER_CHANNEL support in NEQuantizationLayer.
* With this patch we add support for F32 -> QSYMM8_PER_CHANNEL in NEQuantizationLayer * Resolves: ARMCL-1198 Change-Id: I0579f23ef38f716930401d2cdbeaa8cdd5d70d40 Signed-off-by: Pablo Marquez Tello <[email protected]> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14692 Comments-Addressed: Arm Jenkins <[email protected]> Tested-by: Arm Jenkins <[email protected]> Reviewed-by: Gunes Bayir <[email protected]> Benchmark: Arm Jenkins <[email protected]>
1 parent 35cd5a7 commit bc0a47b

File tree

10 files changed

+267
-37
lines changed

10 files changed

+267
-37
lines changed

arm_compute/core/QuantizationInfo.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -394,15 +394,19 @@ inline int8_t quantize_qsymm8(float value, const QuantizationInfo &qinfo)
394394

395395
/** Quantize a value given a 8-bit symmetric per channel quantization scheme
396396
*
397-
* @param[in] value Value to quantize
398-
* @param[in] qinfo Quantization information to use for quantizing
399-
* @param[in] channel_id channel index into the scale vector of quantization info
397+
* @param[in] value Value to quantize
398+
* @param[in] qinfo Quantization information to use for quantizing
399+
* @param[in] channel_id channel index into the scale vector of quantization info
400+
* @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
400401
*
401402
* @return Quantized value
402403
*/
403-
inline int8_t quantize_qsymm8_per_channel(float value, const QuantizationInfo &qinfo, size_t channel_id = 0)
404+
inline int8_t quantize_qsymm8_per_channel(float value,
405+
const QuantizationInfo &qinfo,
406+
size_t channel_id = 0,
407+
RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
404408
{
405-
int quantized = arm_compute::round(value / qinfo.scale()[channel_id], RoundingPolicy::TO_NEAREST_UP);
409+
int quantized = arm_compute::round(value / qinfo.scale()[channel_id], rounding_policy);
406410
quantized = std::max(-128, std::min(quantized, 127));
407411
return quantized;
408412
}

arm_compute/runtime/NEON/functions/NEQuantizationLayer.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,21 +60,21 @@ class NEQuantizationLayer : public IFunction
6060
* - All
6161
*
6262
* Valid data type configurations:
63-
* |src |dst |
64-
* |:------------------|:--------------------------------------|
65-
* |QASYMM8 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
66-
* |QASYMM8_SIGNED |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
67-
* |F16 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
68-
* |F32 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
63+
* |src |dst |
64+
* |:------------------|:----------------------------------------------------------|
65+
* |QASYMM8 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
66+
* |QASYMM8_SIGNED |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
67+
* |F16 |QASYMM8, QASYMM8_SIGNED, QASYMM16 |
68+
* |F32 |QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL, QASYMM16 |
6969
*
7070
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
71-
* @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
71+
* @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QASYMM16
7272
*/
7373
void configure(const ITensor *input, ITensor *output);
7474
/** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayer
7575
*
7676
* @param[in] input Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
77-
* @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
77+
* @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QASYMM16
7878
*
7979
* @return a status
8080
*/

docs/user_guide/operator_list.dox

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2534,7 +2534,7 @@ where N = batches, C = channels, H = height, W = width, D = depth
25342534
<tr><td>QASYMM8<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
25352535
<tr><td>QASYMM8_SIGNED<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
25362536
<tr><td>F16<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
2537-
<tr><td>F32<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
2537+
<tr><td>F32<td>QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL, QASYMM16
25382538
</table>
25392539
<tr>
25402540
<td>CLQuantizationLayer

src/cpu/kernels/CpuQuantizeKernel.cpp

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2022, 2024 Arm Limited.
2+
* Copyright (c) 2017-2022, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -55,8 +55,24 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
5555
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
5656
DataType::F16, DataType::F32);
5757
ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
58-
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QSYMM8, DataType::QASYMM8,
59-
DataType::QASYMM8_SIGNED, DataType::QASYMM16);
58+
if (src->data_type() == DataType::F32)
59+
{
60+
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QSYMM8, DataType::QASYMM8,
61+
DataType::QASYMM8_SIGNED, DataType::QASYMM16,
62+
DataType::QSYMM8_PER_CHANNEL);
63+
64+
if (dst->data_type() == DataType::QSYMM8_PER_CHANNEL)
65+
{
66+
ARM_COMPUTE_RETURN_ERROR_ON(
67+
dst->quantization_info().scale().size() !=
68+
dst->tensor_shape()[get_data_layout_dimension_index(dst->data_layout(), DataLayoutDimension::CHANNEL)]);
69+
}
70+
}
71+
else
72+
{
73+
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QSYMM8, DataType::QASYMM8,
74+
DataType::QASYMM8_SIGNED, DataType::QASYMM16);
75+
}
6076
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
6177

6278
return Status{};
@@ -94,7 +110,7 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
94110
{"op_F32_QASYMM8", REGISTER_FP32_NEON(fp32_u8_run_quantize_qasymm8)},
95111
{"op_F32_QASYMM8_SIGNED", REGISTER_FP32_NEON(fp32_i8_run_quantize_qasymm8)},
96112
{"op_F32_QASYMM16", REGISTER_FP32_NEON(fp32_run_quantize_qasymm16)},
97-
113+
{"op_F32_QSYMM8_PER_CHANNEL", REGISTER_FP32_NEON(fp32_i8_run_quantize_qsymm8_per_channel)},
98114
#ifdef ARM_COMPUTE_ENABLE_FP16
99115
{"op_F16_QASYMM8", REGISTER_FP16_NEON(fp16_u8_run_quantize_qasymm8)},
100116
{"op_F16_QASYMM8_SIGNED", REGISTER_FP16_NEON(fp16_i8_run_quantize_qasymm8)},
@@ -125,7 +141,6 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
125141
// Specify datatype for function
126142
function_to_call += string_from_data_type(src->data_type()) + "_";
127143
function_to_call += string_from_data_type(dst->data_type());
128-
129144
auto it = quant_map.find(function_to_call);
130145

131146
if (it == quant_map.end())
@@ -136,7 +151,16 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
136151

137152
// Calculate window. Squash if possible.
138153
Window win;
139-
std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src);
154+
if (dst->data_type() == DataType::QSYMM8_PER_CHANNEL)
155+
{
156+
// Bring back a full N-dimensional iteration (so channel coord actually goes 0…C-1):
157+
win = calculate_max_window(*src);
158+
_split_dimension = Window::DimY;
159+
}
160+
else
161+
{
162+
std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src);
163+
}
140164

141165
ICpuKernel::configure(win);
142166
}

src/cpu/kernels/quantize/generic/neon/fp32.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024 Arm Limited.
2+
* Copyright (c) 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -44,5 +44,10 @@ void fp32_i8_run_quantize_qsymm8(const ITensor *src, ITensor *dst, const Window
4444
{
4545
run_quantize_qsymm8<float, int8_t>(src, dst, window);
4646
}
47+
48+
void fp32_i8_run_quantize_qsymm8_per_channel(const ITensor *src, ITensor *dst, const Window &window)
49+
{
50+
run_quantize_qsymm8_per_channel<float, int8_t>(src, dst, window);
51+
}
4752
} // namespace cpu
4853
} // namespace arm_compute

src/cpu/kernels/quantize/generic/neon/impl.h

Lines changed: 129 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024 Arm Limited.
2+
* Copyright (c) 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -140,6 +140,134 @@ void run_quantize_qsymm8(const ITensor *src, ITensor *dst, const Window &window)
140140
input, output);
141141
}
142142

143+
inline float32x4x4_t vquantize_qsymm8_per_channel_scalevalues(const float32x4x4_t &vin, float scale)
144+
{
145+
// pre-compute reciprocal of scale
146+
const float32x4_t inv_s = vdupq_n_f32(1.f / scale);
147+
148+
return {vmulq_f32(vin.val[0], inv_s), vmulq_f32(vin.val[1], inv_s), vmulq_f32(vin.val[2], inv_s),
149+
vmulq_f32(vin.val[3], inv_s)};
150+
}
151+
152+
inline int8x16_t vconvert_to_int8(int32x4x4_t vals)
153+
{
154+
// clamp bounds
155+
const int32_t qmin = std::numeric_limits<int8_t>::min(); // -128
156+
const int32_t qmax = std::numeric_limits<int8_t>::max(); // +127
157+
158+
const int32x4_t v_qmin = vdupq_n_s32(qmin);
159+
const int32x4_t v_qmax = vdupq_n_s32(qmax);
160+
vals.val[0] = vmaxq_s32(v_qmin, vminq_s32(vals.val[0], v_qmax));
161+
vals.val[1] = vmaxq_s32(v_qmin, vminq_s32(vals.val[1], v_qmax));
162+
vals.val[2] = vmaxq_s32(v_qmin, vminq_s32(vals.val[2], v_qmax));
163+
vals.val[3] = vmaxq_s32(v_qmin, vminq_s32(vals.val[3], v_qmax));
164+
165+
// 3) narrow 32->16 for each
166+
const int16x4_t v_s16_0 = vqmovn_s32(vals.val[0]);
167+
const int16x4_t v_s16_1 = vqmovn_s32(vals.val[1]);
168+
const int16x4_t v_s16_2 = vqmovn_s32(vals.val[2]);
169+
const int16x4_t v_s16_3 = vqmovn_s32(vals.val[3]);
170+
171+
// 4) combine into two int16x8 vectors
172+
const int16x8_t v_s16x8_0 = vcombine_s16(v_s16_0, v_s16_1);
173+
const int16x8_t v_s16x8_1 = vcombine_s16(v_s16_2, v_s16_3);
174+
175+
// 5) saturating narrow 16->8
176+
const int8x8_t v_s8_0 = vqmovn_s16(v_s16x8_0);
177+
const int8x8_t v_s8_1 = vqmovn_s16(v_s16x8_1);
178+
179+
// 6) combine into one int8x16
180+
return vcombine_s8(v_s8_0, v_s8_1);
181+
}
182+
183+
#ifdef __aarch64__
184+
inline int32x4x4_t vconvert_to_int32(const float32x4x4_t &vals, arm_compute::RoundingPolicy rp)
185+
{
186+
if (rp == RoundingPolicy::TO_NEAREST_EVEN)
187+
{
188+
return {vcvtaq_s32_f32(vals.val[0]), vcvtaq_s32_f32(vals.val[1]), vcvtaq_s32_f32(vals.val[2]),
189+
vcvtaq_s32_f32(vals.val[3])};
190+
}
191+
else
192+
{
193+
return {vcvtq_s32_f32(vals.val[0]), vcvtq_s32_f32(vals.val[1]), vcvtq_s32_f32(vals.val[2]),
194+
vcvtq_s32_f32(vals.val[3])};
195+
}
196+
}
197+
198+
inline int8x16_t vquantize_qsymm8_per_channel(const float32x4x4_t vin,
199+
float scale, // per-channel scale
200+
arm_compute::RoundingPolicy rp)
201+
{
202+
auto vscaled_vals = vquantize_qsymm8_per_channel_scalevalues(vin, scale);
203+
return vconvert_to_int8(vconvert_to_int32(vscaled_vals, rp));
204+
}
205+
#else //__aarch64__
206+
207+
inline int32x4x4_t vconvert_to_int32(const float32x4x4_t &vals)
208+
{
209+
return {// on AArch32 only truncating vcvtq is available
210+
vcvtq_s32_f32(vals.val[0]), vcvtq_s32_f32(vals.val[1]), vcvtq_s32_f32(vals.val[2]),
211+
vcvtq_s32_f32(vals.val[3])};
212+
}
213+
214+
inline int8x16_t vquantize_qsymm8_per_channel(const float32x4x4_t vin, float scale)
215+
{
216+
auto vscaled_vals = vquantize_qsymm8_per_channel_scalevalues(vin, scale);
217+
return vconvert_to_int8(vconvert_to_int32(vscaled_vals));
218+
}
219+
#endif //__aarch64__
220+
221+
template <typename TIn, typename TOut>
222+
void run_quantize_qsymm8_per_channel(const ITensor *src, ITensor *dst, const Window &window)
223+
{
224+
const auto window_start_x = static_cast<int>(window.x().start());
225+
226+
const unsigned int channel_idx =
227+
get_data_layout_dimension_index(dst->info()->data_layout(), DataLayoutDimension::CHANNEL);
228+
229+
Window win_collapsed = window.collapse_if_possible(window, Window::DimX, Window::DimZ);
230+
const auto window_end_x = static_cast<int>(win_collapsed.x().end());
231+
232+
win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
233+
Iterator input(src, win_collapsed);
234+
Iterator output(dst, win_collapsed);
235+
const auto &qinfo = dst->info()->quantization_info();
236+
237+
execute_window_loop(
238+
win_collapsed,
239+
[&](const Coordinates &coord)
240+
{
241+
auto input_ptr = reinterpret_cast<const TIn *>(input.ptr());
242+
auto output_ptr = reinterpret_cast<TOut *>(output.ptr());
243+
int x = window_start_x;
244+
const size_t ch = coord[channel_idx];
245+
const float scale = qinfo.scale()[ch];
246+
for (; x <= (window_end_x - window_step); x += window_step)
247+
{
248+
const auto vin = load_value(&input_ptr[x]);
249+
#ifdef __aarch64__
250+
const auto vout = vquantize_qsymm8_per_channel(vin, scale, RoundingPolicy::TO_NEAREST_EVEN);
251+
#else //__aarch64__
252+
const auto vout = vquantize_qsymm8_per_channel(vin, scale /* RoundingPolicy::TO_ZERO */);
253+
#endif //__aarch64__
254+
wrapper::vstore(&output_ptr[x], vout);
255+
}
256+
// Compute left-over elements
257+
for (; x < window_end_x; ++x)
258+
{
259+
#ifdef __aarch64__
260+
output_ptr[x] = quantize_qsymm8_per_channel(input_ptr[x], dst->info()->quantization_info(), ch,
261+
RoundingPolicy::TO_NEAREST_EVEN);
262+
#else //__aarch64__
263+
output_ptr[x] = quantize_qsymm8_per_channel(input_ptr[x], dst->info()->quantization_info(), ch,
264+
RoundingPolicy::TO_ZERO);
265+
#endif //__aarch64__
266+
}
267+
},
268+
input, output);
269+
}
270+
143271
template <typename TIn, typename TOut>
144272
void run_requantize_offset_only_convert(const ITensor *src, ITensor *dst, const Window &window)
145273
{

src/cpu/kernels/quantize/generic/neon/list.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024 Arm Limited.
2+
* Copyright (c) 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -54,7 +54,7 @@ DECLARE_QUANTIZE_KERNEL(fp32_i8_run_quantize_qasymm8);
5454
DECLARE_QUANTIZE_KERNEL(fp32_run_quantize_qasymm16);
5555

5656
DECLARE_QUANTIZE_KERNEL(fp32_i8_run_quantize_qsymm8);
57-
57+
DECLARE_QUANTIZE_KERNEL(fp32_i8_run_quantize_qsymm8_per_channel);
5858
DECLARE_QUANTIZE_KERNEL(fp16_u8_run_quantize_qasymm8);
5959
DECLARE_QUANTIZE_KERNEL(fp16_i8_run_quantize_qasymm8);
6060
DECLARE_QUANTIZE_KERNEL(fp16_run_quantize_qasymm16);

tests/validation/NEON/QuantizationLayer.cpp

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -90,20 +90,44 @@ TEST_CASE(ProperlyRoundedRequantization, framework::DatasetMode::ALL)
9090
validate(Accessor(output), ref, zero_tolerance_s8);
9191
}
9292

93+
TEST_CASE(QSymm8_per_channel_validate_scales, framework::DatasetMode::ALL)
94+
{
95+
// In this test we make sure validate does not raise an error when we pass a properly initialized vector of scales matching
96+
// the number of channels
97+
const auto input_info = TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32);
98+
auto output_info = TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL);
99+
Tensor input = create_tensor<Tensor>(input_info);
100+
std::vector<float> scale(16,0.5f);
101+
Tensor output = create_tensor<Tensor>(output_info.tensor_shape(), DataType::QSYMM8_PER_CHANNEL, 1, QuantizationInfo(scale));
102+
ARM_COMPUTE_EXPECT(bool(NEQuantizationLayer::validate(
103+
& input.info()->clone()->set_is_resizable(false),
104+
& output.info()->clone()->set_is_resizable(false))) == true, framework::LogLevel::ERRORS);
105+
}
106+
93107
// *INDENT-OFF*
94108
// clang-format off
95109
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
96110
framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8), // Wrong output data type
97111
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), // Wrong output data type
98112
TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::F32), // Missmatching shapes
99113
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), // Valid
114+
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8), // PER_CHANNEL only supported for F32
115+
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8), // PER_CHANNEL only supported for F32
116+
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM16), // PER_CHANNEL only supported for F32
117+
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F16), // PER_CHANNEL only supported for F32
118+
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), // Quantization info's scales not initialized
100119
}),
101120
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
102121
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U16),
103122
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),
104123
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),
124+
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL),
125+
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL),
126+
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL),
127+
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL),
128+
TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL),
105129
})),
106-
framework::dataset::make("Expected", { false, false, false, true})),
130+
framework::dataset::make("Expected", { false, false, false, true,false,false,false,false,false})),
107131
input_info, output_info, expected)
108132
{
109133
ARM_COMPUTE_EXPECT(bool(NEQuantizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -117,6 +141,8 @@ template <typename T>
117141
using NEQuantizationLayerQASYMM8SignedFixture = QuantizationValidationFixture<Tensor, Accessor, NEQuantizationLayer, T, int8_t>;
118142
template <typename T>
119143
using NEQuantizationLayerQASYMM16Fixture = QuantizationValidationFixture<Tensor, Accessor, NEQuantizationLayer, T, uint16_t>;
144+
template <typename T>
145+
using NEQuantizationLayerQSYMM8_PER_CHANNEL_Fixture = QuantizationValidationFixture<Tensor, Accessor, NEQuantizationLayer, T, int8_t>;
120146

121147
TEST_SUITE(Float)
122148
TEST_SUITE(FP32)
@@ -160,6 +186,17 @@ FIXTURE_DATA_TEST_CASE(RunLargeQASYMM16, NEQuantizationLayerQASYMM16Fixture<floa
160186
// Validate output
161187
validate(Accessor(_target), _reference, tolerance_u16);
162188
}
189+
190+
191+
FIXTURE_DATA_TEST_CASE(RunSmallQSYMM8_PER_CHANNEL, NEQuantizationLayerQSYMM8_PER_CHANNEL_Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes,
192+
framework::dataset::make("DataType", DataType::F32)),
193+
framework::dataset::make("DataTypeOut", { DataType::QSYMM8_PER_CHANNEL })),
194+
framework::dataset::make("QuantizationInfoIgnored", { QuantizationInfo() })))
195+
{
196+
// Validate output
197+
validate(Accessor(_target), _reference, tolerance_s8);
198+
}
199+
163200
TEST_SUITE_END() // FP32
164201
#ifdef ARM_COMPUTE_ENABLE_FP16
165202
TEST_SUITE(FP16)

0 commit comments

Comments
 (0)