Update on "[ET-VK] Quantized Int8 Convolution"

ssjia · ssjia · commit 97d037e8cc8c · 2025-08-29T17:33:17.000-07:00
See the below diff; this diff implements int8 quantized conv2d using the quantized linear layer introduced below. Note that the current implementation doesn't yet support depthwise convs; a specialized implementation will need to be added for that. Differential Revision: [D81330809](https://our.internmc.facebook.com/intern/diff/D81330809/) [ghstack-poisoned]
diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedConvolution.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedConvolution.cpp
@@ -478,6 +478,8 @@ void conv2d_q8csw_linear_tiled_impl(
   const ValueRef padding = args.at(idx++);
   const ValueRef dilation = args.at(idx++);
   const ValueRef groups = args.at(idx++);
+  const ValueRef orig_OC = args.at(idx++);
+  (void)orig_OC;
   const ValueRef output = args.at(idx++);
 
   const ValueRef packed_weight = prepack_q8_linear_weight(graph, weight);
@@ -552,6 +554,8 @@ void conv2d_q8ta_q8csw_linear_tiled_impl(
   const ValueRef padding = args.at(idx++);
   const ValueRef dilation = args.at(idx++);
   const ValueRef groups = args.at(idx++);
+  const ValueRef orig_OC = args.at(idx++);
+  (void)orig_OC;
   const ValueRef output = args.at(idx++);
 
   const ValueRef packed_weight = prepack_q8_linear_weight(graph, weight);