Update on "[ET-VK] Implemement linear_dq8ta_q4gsw"

ssjia · ssjia · commit 7e75e5fd05ce · 2025-09-08T11:48:17.000-07:00
Title says it all! Build upon the support for quantized linear introduced in the previous diffs to enable dynamically quantized linear. Also included in this diff is a cleanup of the glslh files used across quantized linear implementations. Differential Revision: [D81931060](https://our.internmc.facebook.com/intern/diff/D81931060/) [ghstack-poisoned]
diff --git a/backends/vulkan/targets.bzl b/backends/vulkan/targets.bzl
@@ -330,7 +330,8 @@ def define_common_targets(is_fbcode = False):
                 "//executorch/exir:tensor",
                 "//executorch/exir/backend/canonical_partitioners:config_partitioner_lib",
                 "//executorch/backends/vulkan/serialization:lib",
-            ]
+            ],
+            typing = True,
         )
 
         runtime.python_library(
diff --git a/backends/vulkan/test/custom_ops/q4gsw_linear.cpp b/backends/vulkan/test/custom_ops/q4gsw_linear.cpp
@@ -267,9 +267,14 @@ std::vector<TestCase> generate_quantized_linear_test_cases() {
     config.test_case_name = generated_test_case_name;
 
     for (const auto& storage_type : storage_types) {
-      // Test both activation+weight quantized and weight only quantized
-      test_cases.push_back(
-          create_test_case_from_config(config, storage_type, vkapi::kFloat));
+      // Test both activation+weight quantized and weight only quantized, but
+      // only if the current device supports int8 dot product
+      if (vkcompute::api::context()
+              ->adapter_ptr()
+              ->supports_int8_dot_product()) {
+        test_cases.push_back(
+            create_test_case_from_config(config, storage_type, vkapi::kFloat));
+      }
 
       LinearConfig wo_quant_config = config;
       wo_quant_config.op_name = "linear_q4gsw";
diff --git a/backends/vulkan/utils.py b/backends/vulkan/utils.py
@@ -1142,17 +1142,17 @@ def maybe_skip_q_dq_arg_chain(
 
     # If the arg is a view copy node, check if the original node is a dequant node
     if is_dequant_node(arg) or (
-        is_view_copy_node(arg) and is_dequant_node(arg.args[0])
+        is_view_copy_node(arg) and is_dequant_node(arg.args[0])  # pyre-ignore[6]
     ):
+        dequant_node = arg
         if is_view_copy_node(arg):
             dequant_node = arg.args[0]
-        else:
-            dequant_node = arg
 
-        quant_node = dequant_node.args[0]
+        quant_node = dequant_node.args[0]  # pyre-ignore[16]
         assert isinstance(quant_node, torch.fx.Node)
         source_arg = quant_node.args[0]
         assert isinstance(source_arg, torch.fx.Node)
+        assert isinstance(dequant_node, torch.fx.Node)
         return source_arg, quant_node, dequant_node
     else:
         return arg, None, None

Original file line number	Diff line number	Diff line change
`@@ -330,7 +330,8 @@ def define_common_targets(is_fbcode = False):`
`330`	`330`	`"//executorch/exir:tensor",`
`331`	`331`	`"//executorch/exir/backend/canonical_partitioners:config_partitioner_lib",`
`332`	`332`	`"//executorch/backends/vulkan/serialization:lib",`
`333`		`- ]`
	`333`	`+ ],`
	`334`	`+ typing = True,`
`334`	`335`	`)`
`335`	`336`
`336`	`337`	`runtime.python_library(`