Update base for Update on "[ET-VK][Ops] choose_qparams op shaders and impl"

morelos · morelos · commit 1ed63257553d · 2025-06-12T12:49:10.000-07:00
Creating the choose_qparams per_tensor and per_token logic shaders and impl which are linked with the testing framework Differential Revision: [D76436933](https://our.internmc.facebook.com/intern/diff/D76436933/) [ghstack-poisoned]
diff --git a/backends/vulkan/runtime/graph/ops/glsl/dequantize_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/dequantize_buffer.yaml
@@ -11,7 +11,6 @@ dequantize_buffer:
     OUT_DTYPE:
       - VALUE: half
       - VALUE: float
-      - VALUE: double
   shader_variants:
     - NAME: dequantize_per_tensor_buffer
       MODE: per_tensor
diff --git a/backends/vulkan/runtime/graph/ops/glsl/dequantize_texture.glsl b/backends/vulkan/runtime/graph/ops/glsl/dequantize_texture.glsl
@@ -67,10 +67,7 @@ $if MODE == "per_tensor":
   [[unroll]] for (int i = 0; i < 4; ++i) {
     IN_T qvalue = IN_T(intex[i]);
     OUT_T value = dequantize_val(qvalue, scale, zero_point);
-    $if OUT_DTYPE == "double":
-      outtex[i] = float(value);
-    $else:
-      outtex[i] = value;
+    outtex[i] = value;
   }
   write_texel(t_out, pos, outtex);
 
@@ -113,10 +110,7 @@ $if MODE == "per_token":
   [[unroll]] for (int i = 0; i < 4; ++i) {
     IN_T qvalue = IN_T(intex[i]);
     OUT_T value = dequantize_val(qvalue, scale_val, zero_point_val);
-    $if OUT_DTYPE == "double":
-      outtex[i] = float(value);
-    $else:
-      outtex[i] = value;
+    outtex[i] = value;
   }
 
   write_texel(t_out, pos, outtex);
diff --git a/backends/vulkan/runtime/graph/ops/glsl/dequantize_texture.yaml b/backends/vulkan/runtime/graph/ops/glsl/dequantize_texture.yaml
@@ -11,7 +11,6 @@ dequantize_texture:
     OUT_DTYPE:
       - VALUE: half
       - VALUE: float
-      - VALUE: double
   shader_variants:
     - NAME: dequantize_per_tensor_texture3d
       MODE: per_tensor
diff --git a/backends/vulkan/runtime/graph/ops/glsl/quantize_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/quantize_buffer.yaml
@@ -7,7 +7,6 @@ quantize_buffer:
     IN_DTYPE:
       - VALUE: half
       - VALUE: float
-      - VALUE: double
     OUT_DTYPE:
       - VALUE: uint8
       - VALUE: int8
diff --git a/backends/vulkan/runtime/graph/ops/glsl/quantize_texture.yaml b/backends/vulkan/runtime/graph/ops/glsl/quantize_texture.yaml
@@ -7,7 +7,6 @@ quantize_texture:
     IN_DTYPE:
       - VALUE: half
       - VALUE: float
-      - VALUE: double
     OUT_DTYPE:
       - VALUE: uint8
       - VALUE: int8
diff --git a/backends/vulkan/runtime/graph/ops/impl/Quantize.cpp b/backends/vulkan/runtime/graph/ops/impl/Quantize.cpp
@@ -162,7 +162,6 @@ void quantize_per_tensor_impl(
 
   // Verify input is a floating point type
   VK_CHECK_COND(
-      graph.dtype_of(input) == vkapi::kDouble ||
       graph.dtype_of(input) == vkapi::kFloat ||
       graph.dtype_of(input) == vkapi::kHalf);
 
@@ -186,7 +185,6 @@ void quantize_per_token_impl(
 
   // Verify input is a floating point type
   VK_CHECK_COND(
-      graph.dtype_of(input) == vkapi::kDouble ||
       graph.dtype_of(input) == vkapi::kFloat ||
       graph.dtype_of(input) == vkapi::kHalf);
 
diff --git a/backends/vulkan/test/op_tests/dequantize_test.cpp b/backends/vulkan/test/op_tests/dequantize_test.cpp
@@ -364,12 +364,6 @@ void test_vulkan_dequantize_per_tensor(
       vkcompute::utils::kBuffer,
       vkcompute::utils::kBuffer);
 
-  // Telling the system to expect a float instead of a double
-  // since the shader can only return 32bit anyways
-  if (out_dtype == at::kDouble) {
-    out_dtype = at::kFloat;
-  }
-
   // Test with texture storage
   test_vulkan_dequantize_per_tensor_impl(
       input_sizes,
@@ -404,12 +398,6 @@ void test_vulkan_dequantize_per_token(
       vkcompute::utils::kBuffer,
       vkcompute::utils::kBuffer);
 
-  // Telling the system to expect a float instead of a double
-  // since the shader can only return 32bit anyways
-  if (out_dtype == at::kDouble) {
-    out_dtype = at::kFloat;
-  }
-
   // Test with texture storage
   test_vulkan_dequantize_per_token_impl(
       input_sizes,
@@ -779,19 +767,6 @@ TEST(
       at::kHalf); // output dtype
 }
 
-TEST(
-    VulkanDequantizePerTensorTest,
-    test_vulkan_dequantize_per_tensor_int32_to_double) {
-  test_vulkan_dequantize_per_tensor(
-      {2, 4, 3}, // input sizes
-      0.0001, // scale
-      100, // zero_point
-      -2147483648, // quant_min
-      2147483647, // quant_max
-      at::kInt, // input dtype
-      at::kDouble); // output dtype
-}
-
 void test_reference_dequantize_per_token(
     const std::vector<int>& input_sizes,
     const std::vector<float>& scales,
@@ -1257,19 +1232,3 @@ TEST(
       at::kInt, // input dtype
       at::kHalf); // output dtype
 }
-
-TEST(
-    VulkanDequantizePerTokenTest,
-    test_vulkan_dequantize_per_token_int32_to_double) {
-  std::vector<float> scales = {0.0001, 0.0002, 0.0003, 0.0};
-  std::vector<int> zero_points = {100, -100, 50, -50};
-
-  test_vulkan_dequantize_per_token(
-      {2, 2, 8}, // input sizes (2*2=4 tokens)
-      scales,
-      zero_points,
-      -2147483648, // quant_min
-      2147483647, // quant_max
-      at::kInt, // input dtype
-      at::kDouble); // output dtype
-}
diff --git a/backends/vulkan/test/op_tests/quantize_test.cpp b/backends/vulkan/test/op_tests/quantize_test.cpp
@@ -314,12 +314,6 @@ void test_vulkan_quantize_per_tensor(
       vkcompute::utils::kBuffer,
       vkcompute::utils::kBuffer);
 
-  // If the in_dtype is a double, convert to float for texture implementation
-  // since they don't support 64bit as inputs
-  if (in_dtype == at::kDouble) {
-    in_dtype = at::kFloat;
-  }
-
   // Test with texture storage
   test_vulkan_quantize_per_tensor_impl(
       input_sizes,
@@ -354,12 +348,6 @@ void test_vulkan_quantize_per_token(
       vkcompute::utils::kBuffer,
       vkcompute::utils::kBuffer);
 
-  // If the in_dtype is a double, convert to float for texture implementation
-  // since they don't support 64bit as inputs
-  if (in_dtype == at::kDouble) {
-    in_dtype = at::kFloat;
-  }
-
   // Test with texture storage
   test_vulkan_quantize_per_token_impl(
       input_sizes,
@@ -651,19 +639,6 @@ TEST(
       at::kChar); // output dtype
 }
 
-TEST(
-    VulkanQuantizePerTensorTest,
-    test_vulkan_quantize_per_tensor_double_to_int8) {
-  test_vulkan_quantize_per_tensor(
-      {2, 3}, // input sizes
-      0.01, // scale
-      1, // zero_point
-      -128, // quant_min
-      127, // quant_max
-      at::kDouble, // input dtype
-      at::kChar); // output dtype
-}
-
 void test_reference_quantize_per_token(
     const std::vector<int>& input_sizes,
     const std::vector<float>& pre_scales,
@@ -1058,19 +1033,3 @@ TEST(VulkanQuantizePerTensorTest, test_vulkan_quantize_per_token_half_to_int8) {
       at::kHalf, // input dtype
       at::kChar); // output dtype
 }
-
-TEST(
-    VulkanQuantizePerTensorTest,
-    test_vulkan_quantize_per_token_double_to_int8) {
-  std::vector<float> scales = {0.1, 0.2};
-  std::vector<int> zero_points = {0, 5};
-
-  test_vulkan_quantize_per_token(
-      {2, 2}, // input sizes (2*2=4 tokens)
-      scales,
-      zero_points,
-      -128, // quant_min
-      127, // quant_max
-      at::kDouble, // input dtype
-      at::kChar); // output dtype
-}