Update on "[ET] correcting cpu ref quantize_per_channel logic to align with ATen"

morelos · morelos · commit b8b4c8d66aff · 2025-07-03T11:27:41.000-07:00
# Context The quantize_per_channel was not perfectly aligned with the ATen implementation, and demonstrated errors when specifying different axis. This bug wasn't distinctly acknowledged given that the test cases only has one test for the whole operator. In order to align more closely with ATen this change simply does a single loop imlpementation with direct channel index calculation over the old `apply_over_dim_list` approach. # Changes We change the core logic for quantize_per_channel to more properly align with ATen's implementation, and we also change it from `apply_over_dim_list` approach to a single loop implementation with direct channel index calculation. This also adds more comprehensive testing for quantize_per_channel so that a bug isn't missed again. Differential Revision: [D77746130](https://our.internmc.facebook.com/intern/diff/D77746130/) [ghstack-poisoned]
diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -616,22 +616,26 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
         // Handle dtype conversion between Vulkan and ExecutorTorch (in-place)
         if (vulkan_dtype == vkapi::kFloat &&
             et_dtype == executorch::aten::ScalarType::Double) {
-          // Convert float32 to float64 in-place (backwards to avoid overwriting)
+          // Convert float32 to float64 in-place (backwards to avoid
+          // overwriting)
           double* data_64 = args[o]->toTensor().mutable_data_ptr<double>();
           const float* data_32 = args[o]->toTensor().const_data_ptr<float>();
           for (size_t j = args[o]->toTensor().numel() - 1; j >= 0; --j) {
             data_64[j] = static_cast<double>(data_32[j]);
-            if (j == 0) break; // Prevent underflow for size_t
+            if (j == 0)
+              break; // Prevent underflow for size_t
           }
         } else if (
             vulkan_dtype == vkapi::kInt &&
             et_dtype == executorch::aten::ScalarType::Long) {
           // Convert int32 to int64 in-place (backwards to avoid overwriting)
           int64_t* data_64 = args[o]->toTensor().mutable_data_ptr<int64_t>();
-          const int32_t* data_32 = args[o]->toTensor().const_data_ptr<int32_t>();
+          const int32_t* data_32 =
+              args[o]->toTensor().const_data_ptr<int32_t>();
           for (size_t j = args[o]->toTensor().numel() - 1; j >= 0; --j) {
             data_64[j] = static_cast<int64_t>(data_32[j]);
-            if (j == 0) break; // Prevent underflow for size_t
+            if (j == 0)
+              break; // Prevent underflow for size_t
           }
         }
       }
diff --git a/kernels/quantized/cpu/op_quantize.cpp b/kernels/quantized/cpu/op_quantize.cpp
@@ -292,29 +292,25 @@ Tensor& quantize_per_channel_out(
     const auto* input_data_ptr = input.const_data_ptr<CTYPE_IN>();             \
     const int64_t input_numel = input.numel();                                 \
     const int64_t axis_size = input.size(axis);                                \
-    \
+                                                                               \
     /* Calculate the stride pattern for efficient channel index calculation */ \
-    int64_t axis_block_size = 1;                                              \
-    for (int64_t i = axis + 1; i < input.dim(); i++) {                        \
-      axis_block_size *= input.size(i);                                       \
+    int64_t axis_block_size = 1;                                               \
+    for (int64_t i = axis + 1; i < input.dim(); i++) {                         \
+      axis_block_size *= input.size(i);                                        \
     }                                                                          \
-    \
+                                                                               \
     /* Single loop over all elements */                                        \
-    for (int64_t i = 0; i < input_numel; i++) {                               \
+    for (int64_t i = 0; i < input_numel; i++) {                                \
       /* Calculate which channel this element belongs to */                    \
-      int64_t channel_idx = (i / axis_block_size) % axis_size;                \
-      \
+      int64_t channel_idx = (i / axis_block_size) % axis_size;                 \
+                                                                               \
       /* Get quantization parameters for this channel */                       \
       double _scale = scale_data[channel_idx];                                 \
       int64_t _zero_point = zero_point_data[channel_idx];                      \
-      \
+                                                                               \
       /* Apply quantization */                                                 \
-      out_data_ptr[i] = quantize_val<CTYPE_OUT, CTYPE_IN>(                    \
-          _scale,                                                              \
-          _zero_point,                                                         \
-          input_data_ptr[i],                                                   \
-          quant_min,                                                           \
-          quant_max);                                                          \
+      out_data_ptr[i] = quantize_val<CTYPE_OUT, CTYPE_IN>(                     \
+          _scale, _zero_point, input_data_ptr[i], quant_min, quant_max);       \
     }                                                                          \
   } break;