Update on "[ET-VK][testing] Q/DQ/CQP op comprehensive delegate dynamic quantization testing"

morelos · morelos · commit 8e0c6677cf61 · 2025-07-11T12:53:58.000-07:00
# Context We need to ensure that most of the operators that were created work in tandem with each other for dynamic quantization. # Changes This creates two test cases to test the per_token and per_tensor pipeline to ensure that the whole full quantization workflow works as intended. Differential Revision: [D77746139](https://our.internmc.facebook.com/intern/diff/D77746139/) [ghstack-poisoned]
diff --git a/backends/vulkan/test/op_tests/dequantize_test.cpp b/backends/vulkan/test/op_tests/dequantize_test.cpp
@@ -587,17 +587,6 @@ at::Tensor dequantize_per_channel_reference_impl(
 }
 
 // Forward declaration of implementation functions
-void test_vulkan_dequantize_per_tensor_impl(
-    const std::vector<int>& input_sizes,
-    float scale,
-    int zero_point,
-    int64_t quant_min,
-    int64_t quant_max,
-    at::ScalarType dtype,
-    at::ScalarType out_dtype,
-    const vkcompute::utils::StorageType in_storage,
-    const vkcompute::utils::StorageType out_storage);
-
 void test_vulkan_dequantize_per_token_impl(
     const std::vector<int>& input_sizes,
     const std::vector<float>& scales,
@@ -632,46 +621,6 @@ void test_vulkan_dequantize_per_tensor_tensor_impl(
     const vkcompute::utils::StorageType in_storage,
     const vkcompute::utils::StorageType out_storage);
 
-// Wrapper function to test both buffer and texture storage types
-void test_vulkan_dequantize_per_tensor(
-    const std::vector<int>& input_sizes,
-    float scale,
-    int zero_point,
-    int64_t quant_min,
-    int64_t quant_max,
-    at::ScalarType dtype,
-    at::ScalarType out_dtype) {
-  // Test with buffer storage
-  test_vulkan_dequantize_per_tensor_impl(
-      input_sizes,
-      scale,
-      zero_point,
-      quant_min,
-      quant_max,
-      dtype,
-      out_dtype,
-      vkcompute::utils::kBuffer,
-      vkcompute::utils::kBuffer);
-
-  // Telling the system to expect a float instead of a double
-  // since the shader can only return 32bit anyways
-  if (out_dtype == at::kDouble) {
-    out_dtype = at::kFloat;
-  }
-
-  // Test with texture storage
-  test_vulkan_dequantize_per_tensor_impl(
-      input_sizes,
-      scale,
-      zero_point,
-      quant_min,
-      quant_max,
-      dtype,
-      out_dtype,
-      vkcompute::utils::kTexture3D,
-      vkcompute::utils::kTexture3D);
-}
-
 // Wrapper function to test both buffer and texture storage types
 void test_vulkan_dequantize_per_token(
     const std::vector<int>& input_sizes,
diff --git a/backends/vulkan/test/op_tests/quantize_test.cpp b/backends/vulkan/test/op_tests/quantize_test.cpp
@@ -489,17 +489,6 @@ at::Tensor quantize_per_channel_reference_impl(
 }
 
 // Forward declaration of implementation functions
-void test_vulkan_quantize_per_tensor_impl(
-    const std::vector<int>& input_sizes,
-    float scale,
-    int zero_point,
-    int64_t quant_min,
-    int64_t quant_max,
-    at::ScalarType in_dtype,
-    at::ScalarType dtype,
-    const vkcompute::utils::StorageType in_storage,
-    const vkcompute::utils::StorageType out_storage);
-
 void test_vulkan_quantize_per_token_impl(
     const std::vector<int>& input_sizes,
     const std::vector<float>& scales,
@@ -534,46 +523,6 @@ void test_vulkan_quantize_per_tensor_tensor_impl(
     const vkcompute::utils::StorageType in_storage,
     const vkcompute::utils::StorageType out_storage);
 
-// Wrapper function to test both buffer and texture storage types
-void test_vulkan_quantize_per_tensor(
-    const std::vector<int>& input_sizes,
-    float scale,
-    int zero_point,
-    int64_t quant_min,
-    int64_t quant_max,
-    at::ScalarType in_dtype = at::kFloat,
-    at::ScalarType dtype = at::kInt) {
-  // Test with buffer storage
-  test_vulkan_quantize_per_tensor_impl(
-      input_sizes,
-      scale,
-      zero_point,
-      quant_min,
-      quant_max,
-      in_dtype,
-      dtype,
-      vkcompute::utils::kBuffer,
-      vkcompute::utils::kBuffer);
-
-  // If the in_dtype is a double, convert to float for texture implementation
-  // since they don't support 64bit as inputs
-  if (in_dtype == at::kDouble) {
-    in_dtype = at::kFloat;
-  }
-
-  // Test with texture storage
-  test_vulkan_quantize_per_tensor_impl(
-      input_sizes,
-      scale,
-      zero_point,
-      quant_min,
-      quant_max,
-      in_dtype,
-      dtype,
-      vkcompute::utils::kTexture3D,
-      vkcompute::utils::kTexture3D);
-}
-
 // Wrapper function to test both buffer and texture storage types
 void test_vulkan_quantize_per_token(
     const std::vector<int>& input_sizes,
diff --git a/extension/aten_util/test/make_aten_functor_from_et_functor_test.cpp b/extension/aten_util/test/make_aten_functor_from_et_functor_test.cpp
@@ -424,10 +424,12 @@ TEST_F(MakeATenFunctorFromETFunctorTest, TestWrap_ArrayRefOptional) {
 
 TEST_F(MakeATenFunctorFromETFunctorTest, TestConvert_ConstRefOptionals) {
   // Test const optional scalar conversion
-  const std::optional<int64_t> const_optional_at_in = std::optional<int64_t>(42);
+  const std::optional<int64_t> const_optional_at_in =
+      std::optional<int64_t>(42);
   auto const_optional_et =
-      type_convert<const std::optional<int64_t>, torch::executor::optional<int64_t>>(
-          const_optional_at_in)
+      type_convert<
+          const std::optional<int64_t>,
+          torch::executor::optional<int64_t>>(const_optional_at_in)
           .call();
   EXPECT_TRUE(const_optional_et.has_value());
   EXPECT_EQ(const_optional_et.value(), 42);
@@ -442,60 +444,69 @@ TEST_F(MakeATenFunctorFromETFunctorTest, TestConvert_ConstRefOptionals) {
   EXPECT_EQ(optional_et_from_ref.value(), 24);
 
   // Test const optional scalar reference conversion
-  const std::optional<int64_t> const_optional_at_ref_in = std::optional<int64_t>(84);
+  const std::optional<int64_t> const_optional_at_ref_in =
+      std::optional<int64_t>(84);
   auto const_optional_et_from_ref =
-      type_convert<const std::optional<int64_t>&, torch::executor::optional<int64_t>>(
-          const_optional_at_ref_in)
+      type_convert<
+          const std::optional<int64_t>&,
+          torch::executor::optional<int64_t>>(const_optional_at_ref_in)
           .call();
   EXPECT_TRUE(const_optional_et_from_ref.has_value());
   EXPECT_EQ(const_optional_et_from_ref.value(), 84);
 
   // Test const optional tensor conversion
   const std::optional<at::Tensor> const_optional_tensor_at_in =
       std::optional<at::Tensor>(torch::tensor({5}));
-  auto const_optional_tensor_converter =
-      type_convert<
-          const std::optional<at::Tensor>,
-          torch::executor::optional<torch::executor::Tensor>>(const_optional_tensor_at_in);
+  auto const_optional_tensor_converter = type_convert<
+      const std::optional<at::Tensor>,
+      torch::executor::optional<torch::executor::Tensor>>(
+      const_optional_tensor_at_in);
   auto const_optional_tensor_et = const_optional_tensor_converter.call();
   EXPECT_TRUE(const_optional_tensor_et.has_value());
   EXPECT_EQ(const_optional_tensor_et.value().const_data_ptr<int64_t>()[0], 5);
 
   // Test optional tensor reference conversion
   std::optional<at::Tensor> optional_tensor_at_ref_in =
       std::optional<at::Tensor>(torch::tensor({7}));
-  auto optional_tensor_converter_from_ref =
-      type_convert<
-          std::optional<at::Tensor>&,
-          torch::executor::optional<torch::executor::Tensor>>(optional_tensor_at_ref_in);
+  auto optional_tensor_converter_from_ref = type_convert<
+      std::optional<at::Tensor>&,
+      torch::executor::optional<torch::executor::Tensor>>(
+      optional_tensor_at_ref_in);
   auto optional_tensor_et_from_ref = optional_tensor_converter_from_ref.call();
   EXPECT_TRUE(optional_tensor_et_from_ref.has_value());
-  EXPECT_EQ(optional_tensor_et_from_ref.value().const_data_ptr<int64_t>()[0], 7);
+  EXPECT_EQ(
+      optional_tensor_et_from_ref.value().const_data_ptr<int64_t>()[0], 7);
 
   // Test const optional tensor reference conversion
   const std::optional<at::Tensor> const_optional_tensor_at_ref_in =
       std::optional<at::Tensor>(torch::tensor({9}));
-  auto const_optional_tensor_converter_from_ref =
-      type_convert<
-          const std::optional<at::Tensor>&,
-          torch::executor::optional<torch::executor::Tensor>>(const_optional_tensor_at_ref_in);
-  auto const_optional_tensor_et_from_ref = const_optional_tensor_converter_from_ref.call();
+  auto const_optional_tensor_converter_from_ref = type_convert<
+      const std::optional<at::Tensor>&,
+      torch::executor::optional<torch::executor::Tensor>>(
+      const_optional_tensor_at_ref_in);
+  auto const_optional_tensor_et_from_ref =
+      const_optional_tensor_converter_from_ref.call();
   EXPECT_TRUE(const_optional_tensor_et_from_ref.has_value());
-  EXPECT_EQ(const_optional_tensor_et_from_ref.value().const_data_ptr<int64_t>()[0], 9);
+  EXPECT_EQ(
+      const_optional_tensor_et_from_ref.value().const_data_ptr<int64_t>()[0],
+      9);
 
   // Test empty const optional conversions
   const std::optional<int64_t> empty_const_optional_at_in = std::nullopt;
   auto empty_const_optional_et =
-      type_convert<const std::optional<int64_t>, torch::executor::optional<int64_t>>(
-          empty_const_optional_at_in)
+      type_convert<
+          const std::optional<int64_t>,
+          torch::executor::optional<int64_t>>(empty_const_optional_at_in)
           .call();
   EXPECT_FALSE(empty_const_optional_et.has_value());
 
-  const std::optional<at::Tensor> empty_const_optional_tensor_at_in = std::nullopt;
+  const std::optional<at::Tensor> empty_const_optional_tensor_at_in =
+      std::nullopt;
   auto empty_const_optional_tensor_et =
       type_convert<
           const std::optional<at::Tensor>,
-          torch::executor::optional<torch::executor::Tensor>>(empty_const_optional_tensor_at_in)
+          torch::executor::optional<torch::executor::Tensor>>(
+          empty_const_optional_tensor_at_in)
           .call();
   EXPECT_FALSE(empty_const_optional_tensor_et.has_value());
 }