Update on "[ET-VK][Ops] dequantize_per_channel reference impl and testing"

morelos · morelos · commit a253b4d8c37c · 2025-07-11T13:56:07.000-07:00
# Context In order to properly enable dynamic quantization, we create the dequantize_per_channel operator as its seemingly useful to have for the pipeline. # Changes This creates the wrapper for the cpu reference implementation, and also a dummy reference implementation I created just to test against it. Differential Revision: [D77746138](https://our.internmc.facebook.com/intern/diff/D77746138/) [ghstack-poisoned]
diff --git a/backends/vulkan/test/op_tests/dequantize_test.cpp b/backends/vulkan/test/op_tests/dequantize_test.cpp
@@ -557,18 +557,6 @@ void test_vulkan_dequantize_per_token_impl(
     const vkcompute::utils::StorageType in_storage,
     const vkcompute::utils::StorageType out_storage);
 
-void test_vulkan_dequantize_per_channel_impl(
-    const std::vector<int>& input_sizes,
-    const std::vector<float>& scales,
-    const std::vector<int>& zero_points,
-    int64_t axis,
-    int64_t quant_min,
-    int64_t quant_max,
-    at::ScalarType dtype,
-    at::ScalarType out_dtype,
-    const vkcompute::utils::StorageType in_storage,
-    const vkcompute::utils::StorageType out_storage);
-
 // Wrapper function to test both buffer and texture storage types
 void test_vulkan_dequantize_per_tensor(
     const std::vector<int>& input_sizes,
@@ -649,49 +637,6 @@ void test_vulkan_dequantize_per_token(
       vkcompute::utils::kTexture3D);
 }
 
-// Wrapper function to test both buffer and texture storage types
-void test_vulkan_dequantize_per_channel(
-    const std::vector<int>& input_sizes,
-    const std::vector<float>& scales,
-    const std::vector<int>& zero_points,
-    int64_t axis,
-    int64_t quant_min,
-    int64_t quant_max,
-    at::ScalarType dtype,
-    at::ScalarType out_dtype) {
-  // Test with buffer storage
-  test_vulkan_dequantize_per_channel_impl(
-      input_sizes,
-      scales,
-      zero_points,
-      axis,
-      quant_min,
-      quant_max,
-      dtype,
-      out_dtype,
-      vkcompute::utils::kBuffer,
-      vkcompute::utils::kBuffer);
-
-  // Telling the system to expect a float instead of a double
-  // since the shader can only return 32bit anyways
-  if (out_dtype == at::kDouble) {
-    out_dtype = at::kFloat;
-  }
-
-  // Test with texture storage
-  test_vulkan_dequantize_per_channel_impl(
-      input_sizes,
-      scales,
-      zero_points,
-      axis,
-      quant_min,
-      quant_max,
-      dtype,
-      out_dtype,
-      vkcompute::utils::kTexture3D,
-      vkcompute::utils::kTexture3D);
-}
-
 void test_reference_dequantize_per_tensor(
     const std::vector<int>& input_sizes,
     float scale,