Update on "[ET-VK] Add custom VkInt4WeightOnlyQuantizer for vulkan"

SS-JIA · SS-JIA · commit 0d56e9a9e809 · 2024-10-15T09:49:40.000-07:00
## Context This diff adds the `VkInt4WeightOnlyQuantizer` class which enables 4-bit quantization of linear layers via source transformation. This quantizer class is copied from `torchao.quantization.GPTQ.WeightOnlyInt4Linear` with some minor changes as annotated in the implementation. Note that the pt2e quantization flow does not yet support groupwise quantization, so source transformation is the only way to perform groupwise quantization at the moment. Differential Revision: [D64406457](https://our.internmc.facebook.com/intern/diff/D64406457/) [ghstack-poisoned]
diff --git a/backends/vulkan/test/op_tests/linear_weight_int4_test.cpp b/backends/vulkan/test/op_tests/linear_weight_int4_test.cpp
@@ -202,15 +202,15 @@ void test_vulkan_linear_int4(
   ASSERT_TRUE(at::allclose(vk_out, out_ref, 1e-4, 1e-4));
 }
 
-TEST(VulkanSDPATest, test_reference_impl) {
+TEST(VulkanInt4LinearTest, test_reference_impl) {
   test_reference_linear_int4(
       /*B = */ 1,
       /*M = */ 4,
       /*K = */ 128,
       /*N = */ 32);
 }
 
-TEST(VulkanSDPATest, test_vulkan_impl) {
+TEST(VulkanInt4LinearTest, test_vulkan_impl) {
   if (!vkcompute::api::context()
            ->adapter_ptr()
            ->has_full_int8_buffers_support()) {