feat: QSYMM8_PER_CHANNEL support in NEQuantizationLayer.

morgolock · morgolock · commit bc0a47bcbb74 · 2025-06-20T11:08:45.000Z
* With this patch we add support for F32 -> QSYMM8_PER_CHANNEL in NEQuantizationLayer * Resolves: ARMCL-1198 Change-Id: I0579f23ef38f716930401d2cdbeaa8cdd5d70d40 Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14692 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
@@ -394,15 +394,19 @@ inline int8_t quantize_qsymm8(float value, const QuantizationInfo &qinfo)
 
 /** Quantize a value given a 8-bit symmetric per channel quantization scheme
  *
- * @param[in] value      Value to quantize
- * @param[in] qinfo      Quantization information to use for quantizing
- * @param[in] channel_id channel index into the scale vector of quantization info
+ * @param[in] value           Value to quantize
+ * @param[in] qinfo           Quantization information to use for quantizing
+ * @param[in] channel_id      channel index into the scale vector of quantization info
+ * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
  *
  * @return Quantized value
  */
-inline int8_t quantize_qsymm8_per_channel(float value, const QuantizationInfo &qinfo, size_t channel_id = 0)
+inline int8_t quantize_qsymm8_per_channel(float                   value,
+                                          const QuantizationInfo &qinfo,
+                                          size_t                  channel_id      = 0,
+                                          RoundingPolicy          rounding_policy = RoundingPolicy::TO_NEAREST_UP)
 {
-    int quantized = arm_compute::round(value / qinfo.scale()[channel_id], RoundingPolicy::TO_NEAREST_UP);
+    int quantized = arm_compute::round(value / qinfo.scale()[channel_id], rounding_policy);
     quantized     = std::max(-128, std::min(quantized, 127));
     return quantized;
 }
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -60,21 +60,21 @@ class NEQuantizationLayer : public IFunction
      * - All
      *
      * Valid data type configurations:
-     * |src                |dst                                    |
-     * |:------------------|:--------------------------------------|
-     * |QASYMM8            |QASYMM8, QASYMM8_SIGNED, QASYMM16      |
-     * |QASYMM8_SIGNED     |QASYMM8, QASYMM8_SIGNED, QASYMM16      |
-     * |F16                |QASYMM8, QASYMM8_SIGNED, QASYMM16      |
-     * |F32                |QASYMM8, QASYMM8_SIGNED, QASYMM16      |
+     * |src                |dst                                                        |
+     * |:------------------|:----------------------------------------------------------|
+     * |QASYMM8            |QASYMM8, QASYMM8_SIGNED, QASYMM16                          |
+     * |QASYMM8_SIGNED     |QASYMM8, QASYMM8_SIGNED, QASYMM16                          |
+     * |F16                |QASYMM8, QASYMM8_SIGNED, QASYMM16                          |
+     * |F32                |QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL, QASYMM16      |
      *
      * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
-     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
+     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QASYMM16
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayer
      *
      * @param[in] input  Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
-     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16
+     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QASYMM16
      *
      * @return a status
      */
diff --git a/docs/user_guide/operator_list.dox b/docs/user_guide/operator_list.dox
@@ -2534,7 +2534,7 @@ where N = batches, C = channels, H = height, W = width, D = depth
     <tr><td>QASYMM8<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
     <tr><td>QASYMM8_SIGNED<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
     <tr><td>F16<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
-    <tr><td>F32<td>QASYMM8, QASYMM8_SIGNED, QASYMM16
+    <tr><td>F32<td>QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL, QASYMM16
     </table>
 <tr>
   <td>CLQuantizationLayer
diff --git a/src/cpu/kernels/CpuQuantizeKernel.cpp b/src/cpu/kernels/CpuQuantizeKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022, 2024 Arm Limited.
+ * Copyright (c) 2017-2022, 2024-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,8 +55,24 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
                                                          DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QSYMM8, DataType::QASYMM8,
-                                                         DataType::QASYMM8_SIGNED, DataType::QASYMM16);
+    if (src->data_type() == DataType::F32)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QSYMM8, DataType::QASYMM8,
+                                                             DataType::QASYMM8_SIGNED, DataType::QASYMM16,
+                                                             DataType::QSYMM8_PER_CHANNEL);
+
+        if (dst->data_type() == DataType::QSYMM8_PER_CHANNEL)
+        {
+            ARM_COMPUTE_RETURN_ERROR_ON(
+                dst->quantization_info().scale().size() !=
+                dst->tensor_shape()[get_data_layout_dimension_index(dst->data_layout(), DataLayoutDimension::CHANNEL)]);
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QSYMM8, DataType::QASYMM8,
+                                                             DataType::QASYMM8_SIGNED, DataType::QASYMM16);
+    }
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
 
     return Status{};
@@ -94,7 +110,7 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
         {"op_F32_QASYMM8", REGISTER_FP32_NEON(fp32_u8_run_quantize_qasymm8)},
         {"op_F32_QASYMM8_SIGNED", REGISTER_FP32_NEON(fp32_i8_run_quantize_qasymm8)},
         {"op_F32_QASYMM16", REGISTER_FP32_NEON(fp32_run_quantize_qasymm16)},
-
+        {"op_F32_QSYMM8_PER_CHANNEL", REGISTER_FP32_NEON(fp32_i8_run_quantize_qsymm8_per_channel)},
 #ifdef ARM_COMPUTE_ENABLE_FP16
         {"op_F16_QASYMM8", REGISTER_FP16_NEON(fp16_u8_run_quantize_qasymm8)},
         {"op_F16_QASYMM8_SIGNED", REGISTER_FP16_NEON(fp16_i8_run_quantize_qasymm8)},
@@ -125,7 +141,6 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
     // Specify datatype for function
     function_to_call += string_from_data_type(src->data_type()) + "_";
     function_to_call += string_from_data_type(dst->data_type());
-
     auto it = quant_map.find(function_to_call);
 
     if (it == quant_map.end())
@@ -136,7 +151,16 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
 
     // Calculate window. Squash if possible.
     Window win;
-    std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src);
+    if (dst->data_type() == DataType::QSYMM8_PER_CHANNEL)
+    {
+        // Bring back a full N-dimensional iteration (so channel coord actually goes 0…C-1):
+        win              = calculate_max_window(*src);
+        _split_dimension = Window::DimY;
+    }
+    else
+    {
+        std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src);
+    }
 
     ICpuKernel::configure(win);
 }
diff --git a/src/cpu/kernels/quantize/generic/neon/fp32.cpp b/src/cpu/kernels/quantize/generic/neon/fp32.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Arm Limited.
+ * Copyright (c) 2024-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,5 +44,10 @@ void fp32_i8_run_quantize_qsymm8(const ITensor *src, ITensor *dst, const Window
 {
     run_quantize_qsymm8<float, int8_t>(src, dst, window);
 }
+
+void fp32_i8_run_quantize_qsymm8_per_channel(const ITensor *src, ITensor *dst, const Window &window)
+{
+    run_quantize_qsymm8_per_channel<float, int8_t>(src, dst, window);
+}
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/cpu/kernels/quantize/generic/neon/impl.h b/src/cpu/kernels/quantize/generic/neon/impl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Arm Limited.
+ * Copyright (c) 2024-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -140,6 +140,134 @@ void run_quantize_qsymm8(const ITensor *src, ITensor *dst, const Window &window)
         input, output);
 }
 
+inline float32x4x4_t vquantize_qsymm8_per_channel_scalevalues(const float32x4x4_t &vin, float scale)
+{
+    // pre-compute reciprocal of scale
+    const float32x4_t inv_s = vdupq_n_f32(1.f / scale);
+
+    return {vmulq_f32(vin.val[0], inv_s), vmulq_f32(vin.val[1], inv_s), vmulq_f32(vin.val[2], inv_s),
+            vmulq_f32(vin.val[3], inv_s)};
+}
+
+inline int8x16_t vconvert_to_int8(int32x4x4_t vals)
+{
+    // clamp bounds
+    const int32_t qmin = std::numeric_limits<int8_t>::min(); // -128
+    const int32_t qmax = std::numeric_limits<int8_t>::max(); // +127
+
+    const int32x4_t v_qmin = vdupq_n_s32(qmin);
+    const int32x4_t v_qmax = vdupq_n_s32(qmax);
+    vals.val[0]            = vmaxq_s32(v_qmin, vminq_s32(vals.val[0], v_qmax));
+    vals.val[1]            = vmaxq_s32(v_qmin, vminq_s32(vals.val[1], v_qmax));
+    vals.val[2]            = vmaxq_s32(v_qmin, vminq_s32(vals.val[2], v_qmax));
+    vals.val[3]            = vmaxq_s32(v_qmin, vminq_s32(vals.val[3], v_qmax));
+
+    // 3) narrow 32->16 for each
+    const int16x4_t v_s16_0 = vqmovn_s32(vals.val[0]);
+    const int16x4_t v_s16_1 = vqmovn_s32(vals.val[1]);
+    const int16x4_t v_s16_2 = vqmovn_s32(vals.val[2]);
+    const int16x4_t v_s16_3 = vqmovn_s32(vals.val[3]);
+
+    // 4) combine into two int16x8 vectors
+    const int16x8_t v_s16x8_0 = vcombine_s16(v_s16_0, v_s16_1);
+    const int16x8_t v_s16x8_1 = vcombine_s16(v_s16_2, v_s16_3);
+
+    // 5) saturating narrow 16->8
+    const int8x8_t v_s8_0 = vqmovn_s16(v_s16x8_0);
+    const int8x8_t v_s8_1 = vqmovn_s16(v_s16x8_1);
+
+    // 6) combine into one int8x16
+    return vcombine_s8(v_s8_0, v_s8_1);
+}
+
+#ifdef __aarch64__
+inline int32x4x4_t vconvert_to_int32(const float32x4x4_t &vals, arm_compute::RoundingPolicy rp)
+{
+    if (rp == RoundingPolicy::TO_NEAREST_EVEN)
+    {
+        return {vcvtaq_s32_f32(vals.val[0]), vcvtaq_s32_f32(vals.val[1]), vcvtaq_s32_f32(vals.val[2]),
+                vcvtaq_s32_f32(vals.val[3])};
+    }
+    else
+    {
+        return {vcvtq_s32_f32(vals.val[0]), vcvtq_s32_f32(vals.val[1]), vcvtq_s32_f32(vals.val[2]),
+                vcvtq_s32_f32(vals.val[3])};
+    }
+}
+
+inline int8x16_t vquantize_qsymm8_per_channel(const float32x4x4_t         vin,
+                                              float                       scale, // per-channel scale
+                                              arm_compute::RoundingPolicy rp)
+{
+    auto vscaled_vals = vquantize_qsymm8_per_channel_scalevalues(vin, scale);
+    return vconvert_to_int8(vconvert_to_int32(vscaled_vals, rp));
+}
+#else  //__aarch64__
+
+inline int32x4x4_t vconvert_to_int32(const float32x4x4_t &vals)
+{
+    return {// on AArch32 only truncating vcvtq is available
+            vcvtq_s32_f32(vals.val[0]), vcvtq_s32_f32(vals.val[1]), vcvtq_s32_f32(vals.val[2]),
+            vcvtq_s32_f32(vals.val[3])};
+}
+
+inline int8x16_t vquantize_qsymm8_per_channel(const float32x4x4_t vin, float scale)
+{
+    auto vscaled_vals = vquantize_qsymm8_per_channel_scalevalues(vin, scale);
+    return vconvert_to_int8(vconvert_to_int32(vscaled_vals));
+}
+#endif //__aarch64__
+
+template <typename TIn, typename TOut>
+void run_quantize_qsymm8_per_channel(const ITensor *src, ITensor *dst, const Window &window)
+{
+    const auto window_start_x = static_cast<int>(window.x().start());
+
+    const unsigned int channel_idx =
+        get_data_layout_dimension_index(dst->info()->data_layout(), DataLayoutDimension::CHANNEL);
+
+    Window     win_collapsed = window.collapse_if_possible(window, Window::DimX, Window::DimZ);
+    const auto window_end_x  = static_cast<int>(win_collapsed.x().end());
+
+    win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+    Iterator    input(src, win_collapsed);
+    Iterator    output(dst, win_collapsed);
+    const auto &qinfo = dst->info()->quantization_info();
+
+    execute_window_loop(
+        win_collapsed,
+        [&](const Coordinates &coord)
+        {
+            auto         input_ptr  = reinterpret_cast<const TIn *>(input.ptr());
+            auto         output_ptr = reinterpret_cast<TOut *>(output.ptr());
+            int          x          = window_start_x;
+            const size_t ch         = coord[channel_idx];
+            const float  scale      = qinfo.scale()[ch];
+            for (; x <= (window_end_x - window_step); x += window_step)
+            {
+                const auto vin = load_value(&input_ptr[x]);
+#ifdef __aarch64__
+                const auto vout = vquantize_qsymm8_per_channel(vin, scale, RoundingPolicy::TO_NEAREST_EVEN);
+#else  //__aarch64__
+                const auto vout = vquantize_qsymm8_per_channel(vin, scale /* RoundingPolicy::TO_ZERO */);
+#endif //__aarch64__
+                wrapper::vstore(&output_ptr[x], vout);
+            }
+            // Compute left-over elements
+            for (; x < window_end_x; ++x)
+            {
+#ifdef __aarch64__
+                output_ptr[x] = quantize_qsymm8_per_channel(input_ptr[x], dst->info()->quantization_info(), ch,
+                                                            RoundingPolicy::TO_NEAREST_EVEN);
+#else  //__aarch64__
+                output_ptr[x]   = quantize_qsymm8_per_channel(input_ptr[x], dst->info()->quantization_info(), ch,
+                                                              RoundingPolicy::TO_ZERO);
+#endif //__aarch64__
+            }
+        },
+        input, output);
+}
+
 template <typename TIn, typename TOut>
 void run_requantize_offset_only_convert(const ITensor *src, ITensor *dst, const Window &window)
 {
diff --git a/src/cpu/kernels/quantize/generic/neon/list.h b/src/cpu/kernels/quantize/generic/neon/list.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Arm Limited.
+ * Copyright (c) 2024-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,7 +54,7 @@ DECLARE_QUANTIZE_KERNEL(fp32_i8_run_quantize_qasymm8);
 DECLARE_QUANTIZE_KERNEL(fp32_run_quantize_qasymm16);
 
 DECLARE_QUANTIZE_KERNEL(fp32_i8_run_quantize_qsymm8);
-
+DECLARE_QUANTIZE_KERNEL(fp32_i8_run_quantize_qsymm8_per_channel);
 DECLARE_QUANTIZE_KERNEL(fp16_u8_run_quantize_qasymm8);
 DECLARE_QUANTIZE_KERNEL(fp16_i8_run_quantize_qasymm8);
 DECLARE_QUANTIZE_KERNEL(fp16_run_quantize_qasymm16);
diff --git a/tests/validation/NEON/QuantizationLayer.cpp b/tests/validation/NEON/QuantizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2024 Arm Limited.
+ * Copyright (c) 2017-2021, 2024-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -90,20 +90,44 @@ TEST_CASE(ProperlyRoundedRequantization, framework::DatasetMode::ALL)
     validate(Accessor(output), ref, zero_tolerance_s8);
 }
 
+TEST_CASE(QSymm8_per_channel_validate_scales, framework::DatasetMode::ALL)
+{
+    // In this test we make sure validate does not raise an error when we pass a properly initialized vector of scales matching
+    // the number of channels
+    const auto input_info = TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32);
+    auto output_info = TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL);
+    Tensor input = create_tensor<Tensor>(input_info);
+    std::vector<float> scale(16,0.5f);
+    Tensor output = create_tensor<Tensor>(output_info.tensor_shape(), DataType::QSYMM8_PER_CHANNEL, 1, QuantizationInfo(scale));
+    ARM_COMPUTE_EXPECT(bool(NEQuantizationLayer::validate(
+        & input.info()->clone()->set_is_resizable(false),
+        & output.info()->clone()->set_is_resizable(false))) == true, framework::LogLevel::ERRORS);
+}
+
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Wrong output data type
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),  // Wrong output data type
                                                        TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::F32),  // Missmatching shapes
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),  // Valid
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // PER_CHANNEL only supported for F32
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8),  // PER_CHANNEL only supported for F32
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM16),  // PER_CHANNEL only supported for F32
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F16),  // PER_CHANNEL only supported for F32
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), // Quantization info's scales not initialized
                                                      }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U16),
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL),
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL),
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL),
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL),
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QSYMM8_PER_CHANNEL),
                                                      })),
-               framework::dataset::make("Expected", { false, false, false, true})),
+               framework::dataset::make("Expected", { false, false, false, true,false,false,false,false,false})),
                input_info, output_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(NEQuantizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -117,6 +141,8 @@ template <typename T>
 using NEQuantizationLayerQASYMM8SignedFixture = QuantizationValidationFixture<Tensor, Accessor, NEQuantizationLayer, T, int8_t>;
 template <typename T>
 using NEQuantizationLayerQASYMM16Fixture = QuantizationValidationFixture<Tensor, Accessor, NEQuantizationLayer, T, uint16_t>;
+template <typename T>
+using NEQuantizationLayerQSYMM8_PER_CHANNEL_Fixture = QuantizationValidationFixture<Tensor, Accessor, NEQuantizationLayer, T, int8_t>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
@@ -160,6 +186,17 @@ FIXTURE_DATA_TEST_CASE(RunLargeQASYMM16, NEQuantizationLayerQASYMM16Fixture<floa
     // Validate output
     validate(Accessor(_target), _reference, tolerance_u16);
 }
+
+
+FIXTURE_DATA_TEST_CASE(RunSmallQSYMM8_PER_CHANNEL, NEQuantizationLayerQSYMM8_PER_CHANNEL_Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(QuantizationSmallShapes,
+                       framework::dataset::make("DataType", DataType::F32)),
+                       framework::dataset::make("DataTypeOut", { DataType::QSYMM8_PER_CHANNEL })),
+                       framework::dataset::make("QuantizationInfoIgnored", { QuantizationInfo() })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_s8);
+}
+
 TEST_SUITE_END() // FP32
 #ifdef ARM_COMPUTE_ENABLE_FP16
 TEST_SUITE(FP16)
diff --git a/tests/validation/fixtures/QuantizationLayerFixture.h b/tests/validation/fixtures/QuantizationLayerFixture.h
diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp

Original file line number	Diff line number	Diff line change
`@@ -394,15 +394,19 @@ inline int8_t quantize_qsymm8(float value, const QuantizationInfo &qinfo)`
`394`	`394`
`395`	`395`	`/** Quantize a value given a 8-bit symmetric per channel quantization scheme`
`396`	`396`	`*`
`397`		`- * @param[in] value Value to quantize`
`398`		`- * @param[in] qinfo Quantization information to use for quantizing`
`399`		`- * @param[in] channel_id channel index into the scale vector of quantization info`
	`397`	`+ * @param[in] value Value to quantize`
	`398`	`+ * @param[in] qinfo Quantization information to use for quantizing`
	`399`	`+ * @param[in] channel_id channel index into the scale vector of quantization info`
	`400`	`+ * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up`
`400`	`401`	`*`
`401`	`402`	`* @return Quantized value`
`402`	`403`	`*/`
`403`		`-inline int8_t quantize_qsymm8_per_channel(float value, const QuantizationInfo &qinfo, size_t channel_id = 0)`
	`404`	`+inline int8_t quantize_qsymm8_per_channel(float value,`
	`405`	`+ const QuantizationInfo &qinfo,`
	`406`	`+ size_t channel_id = 0,`
	`407`	`+ RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)`
`404`	`408`	`{`
`405`		`- int quantized = arm_compute::round(value / qinfo.scale()[channel_id], RoundingPolicy::TO_NEAREST_UP);`
	`409`	`+ int quantized = arm_compute::round(value / qinfo.scale()[channel_id], rounding_policy);`
`406`	`410`	`quantized = std::max(-128, std::min(quantized, 127));`
`407`	`411`	`return quantized;`
`408`	`412`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`/*`
`2`		`- * Copyright (c) 2024 Arm Limited.`
	`2`	`+ * Copyright (c) 2024-2025 Arm Limited.`
`3`	`3`	`*`
`4`	`4`	`* SPDX-License-Identifier: MIT`
`5`	`5`	`*`
`@@ -44,5 +44,10 @@ void fp32_i8_run_quantize_qsymm8(const ITensor src, ITensor dst, const Window`
`44`	`44`	`{`
`45`	`45`	`run_quantize_qsymm8<float, int8_t>(src, dst, window);`
`46`	`46`	`}`
	`47`	`+`
	`48`	`+void fp32_i8_run_quantize_qsymm8_per_channel(const ITensor src, ITensor dst, const Window &window)`
	`49`	`+{`
	`50`	`+ run_quantize_qsymm8_per_channel<float, int8_t>(src, dst, window);`
	`51`	`+}`
`47`	`52`	`} // namespace cpu`
`48`	`53`	`} // namespace arm_compute`