Skip to content

Commit 1ed6325

Browse files
author
morelos
committed
Update base for Update on "[ET-VK][Ops] choose_qparams op shaders and impl"
Creating the choose_qparams per_tensor and per_token logic shaders and impl which are linked with the testing framework Differential Revision: [D76436933](https://our.internmc.facebook.com/intern/diff/D76436933/) [ghstack-poisoned]
1 parent 7ea98df commit 1ed6325

File tree

8 files changed

+2
-96
lines changed

8 files changed

+2
-96
lines changed

backends/vulkan/runtime/graph/ops/glsl/dequantize_buffer.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ dequantize_buffer:
1111
OUT_DTYPE:
1212
- VALUE: half
1313
- VALUE: float
14-
- VALUE: double
1514
shader_variants:
1615
- NAME: dequantize_per_tensor_buffer
1716
MODE: per_tensor

backends/vulkan/runtime/graph/ops/glsl/dequantize_texture.glsl

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,7 @@ $if MODE == "per_tensor":
6767
[[unroll]] for (int i = 0; i < 4; ++i) {
6868
IN_T qvalue = IN_T(intex[i]);
6969
OUT_T value = dequantize_val(qvalue, scale, zero_point);
70-
$if OUT_DTYPE == "double":
71-
outtex[i] = float(value);
72-
$else:
73-
outtex[i] = value;
70+
outtex[i] = value;
7471
}
7572
write_texel(t_out, pos, outtex);
7673

@@ -113,10 +110,7 @@ $if MODE == "per_token":
113110
[[unroll]] for (int i = 0; i < 4; ++i) {
114111
IN_T qvalue = IN_T(intex[i]);
115112
OUT_T value = dequantize_val(qvalue, scale_val, zero_point_val);
116-
$if OUT_DTYPE == "double":
117-
outtex[i] = float(value);
118-
$else:
119-
outtex[i] = value;
113+
outtex[i] = value;
120114
}
121115

122116
write_texel(t_out, pos, outtex);

backends/vulkan/runtime/graph/ops/glsl/dequantize_texture.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ dequantize_texture:
1111
OUT_DTYPE:
1212
- VALUE: half
1313
- VALUE: float
14-
- VALUE: double
1514
shader_variants:
1615
- NAME: dequantize_per_tensor_texture3d
1716
MODE: per_tensor

backends/vulkan/runtime/graph/ops/glsl/quantize_buffer.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ quantize_buffer:
77
IN_DTYPE:
88
- VALUE: half
99
- VALUE: float
10-
- VALUE: double
1110
OUT_DTYPE:
1211
- VALUE: uint8
1312
- VALUE: int8

backends/vulkan/runtime/graph/ops/glsl/quantize_texture.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ quantize_texture:
77
IN_DTYPE:
88
- VALUE: half
99
- VALUE: float
10-
- VALUE: double
1110
OUT_DTYPE:
1211
- VALUE: uint8
1312
- VALUE: int8

backends/vulkan/runtime/graph/ops/impl/Quantize.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,6 @@ void quantize_per_tensor_impl(
162162

163163
// Verify input is a floating point type
164164
VK_CHECK_COND(
165-
graph.dtype_of(input) == vkapi::kDouble ||
166165
graph.dtype_of(input) == vkapi::kFloat ||
167166
graph.dtype_of(input) == vkapi::kHalf);
168167

@@ -186,7 +185,6 @@ void quantize_per_token_impl(
186185

187186
// Verify input is a floating point type
188187
VK_CHECK_COND(
189-
graph.dtype_of(input) == vkapi::kDouble ||
190188
graph.dtype_of(input) == vkapi::kFloat ||
191189
graph.dtype_of(input) == vkapi::kHalf);
192190

backends/vulkan/test/op_tests/dequantize_test.cpp

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -364,12 +364,6 @@ void test_vulkan_dequantize_per_tensor(
364364
vkcompute::utils::kBuffer,
365365
vkcompute::utils::kBuffer);
366366

367-
// Telling the system to expect a float instead of a double
368-
// since the shader can only return 32bit anyways
369-
if (out_dtype == at::kDouble) {
370-
out_dtype = at::kFloat;
371-
}
372-
373367
// Test with texture storage
374368
test_vulkan_dequantize_per_tensor_impl(
375369
input_sizes,
@@ -404,12 +398,6 @@ void test_vulkan_dequantize_per_token(
404398
vkcompute::utils::kBuffer,
405399
vkcompute::utils::kBuffer);
406400

407-
// Telling the system to expect a float instead of a double
408-
// since the shader can only return 32bit anyways
409-
if (out_dtype == at::kDouble) {
410-
out_dtype = at::kFloat;
411-
}
412-
413401
// Test with texture storage
414402
test_vulkan_dequantize_per_token_impl(
415403
input_sizes,
@@ -779,19 +767,6 @@ TEST(
779767
at::kHalf); // output dtype
780768
}
781769

782-
TEST(
783-
VulkanDequantizePerTensorTest,
784-
test_vulkan_dequantize_per_tensor_int32_to_double) {
785-
test_vulkan_dequantize_per_tensor(
786-
{2, 4, 3}, // input sizes
787-
0.0001, // scale
788-
100, // zero_point
789-
-2147483648, // quant_min
790-
2147483647, // quant_max
791-
at::kInt, // input dtype
792-
at::kDouble); // output dtype
793-
}
794-
795770
void test_reference_dequantize_per_token(
796771
const std::vector<int>& input_sizes,
797772
const std::vector<float>& scales,
@@ -1257,19 +1232,3 @@ TEST(
12571232
at::kInt, // input dtype
12581233
at::kHalf); // output dtype
12591234
}
1260-
1261-
TEST(
1262-
VulkanDequantizePerTokenTest,
1263-
test_vulkan_dequantize_per_token_int32_to_double) {
1264-
std::vector<float> scales = {0.0001, 0.0002, 0.0003, 0.0};
1265-
std::vector<int> zero_points = {100, -100, 50, -50};
1266-
1267-
test_vulkan_dequantize_per_token(
1268-
{2, 2, 8}, // input sizes (2*2=4 tokens)
1269-
scales,
1270-
zero_points,
1271-
-2147483648, // quant_min
1272-
2147483647, // quant_max
1273-
at::kInt, // input dtype
1274-
at::kDouble); // output dtype
1275-
}

backends/vulkan/test/op_tests/quantize_test.cpp

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -314,12 +314,6 @@ void test_vulkan_quantize_per_tensor(
314314
vkcompute::utils::kBuffer,
315315
vkcompute::utils::kBuffer);
316316

317-
// If the in_dtype is a double, convert to float for texture implementation
318-
// since they don't support 64bit as inputs
319-
if (in_dtype == at::kDouble) {
320-
in_dtype = at::kFloat;
321-
}
322-
323317
// Test with texture storage
324318
test_vulkan_quantize_per_tensor_impl(
325319
input_sizes,
@@ -354,12 +348,6 @@ void test_vulkan_quantize_per_token(
354348
vkcompute::utils::kBuffer,
355349
vkcompute::utils::kBuffer);
356350

357-
// If the in_dtype is a double, convert to float for texture implementation
358-
// since they don't support 64bit as inputs
359-
if (in_dtype == at::kDouble) {
360-
in_dtype = at::kFloat;
361-
}
362-
363351
// Test with texture storage
364352
test_vulkan_quantize_per_token_impl(
365353
input_sizes,
@@ -651,19 +639,6 @@ TEST(
651639
at::kChar); // output dtype
652640
}
653641

654-
TEST(
655-
VulkanQuantizePerTensorTest,
656-
test_vulkan_quantize_per_tensor_double_to_int8) {
657-
test_vulkan_quantize_per_tensor(
658-
{2, 3}, // input sizes
659-
0.01, // scale
660-
1, // zero_point
661-
-128, // quant_min
662-
127, // quant_max
663-
at::kDouble, // input dtype
664-
at::kChar); // output dtype
665-
}
666-
667642
void test_reference_quantize_per_token(
668643
const std::vector<int>& input_sizes,
669644
const std::vector<float>& pre_scales,
@@ -1058,19 +1033,3 @@ TEST(VulkanQuantizePerTensorTest, test_vulkan_quantize_per_token_half_to_int8) {
10581033
at::kHalf, // input dtype
10591034
at::kChar); // output dtype
10601035
}
1061-
1062-
TEST(
1063-
VulkanQuantizePerTensorTest,
1064-
test_vulkan_quantize_per_token_double_to_int8) {
1065-
std::vector<float> scales = {0.1, 0.2};
1066-
std::vector<int> zero_points = {0, 5};
1067-
1068-
test_vulkan_quantize_per_token(
1069-
{2, 2}, // input sizes (2*2=4 tokens)
1070-
scales,
1071-
zero_points,
1072-
-128, // quant_min
1073-
127, // quant_max
1074-
at::kDouble, // input dtype
1075-
at::kChar); // output dtype
1076-
}

0 commit comments

Comments
 (0)