Add computation for non-batch dims; remove non-batch dims check

keyprocedure · keyprocedure · commit 62e30e574cff · 2025-04-16T15:15:24.000-07:00
diff --git a/backends/xnnpack/operators/quant_params.py b/backends/xnnpack/operators/quant_params.py
@@ -145,9 +145,14 @@ def quantize_tensor(self, tensor: torch.Tensor) -> torch.Tensor:
     def _from_dynamic_input_node(cls, quant_node: torch.fx.Node) -> QuantParams:
         q_input = quant_node.args[0]  # fp32 input
         assert isinstance(q_input, torch.fx.Node)
-        # TODO - materialize this from the quant_node scale count and val shape
         num_nonbatch_dims = 1
 
+        # Compute non-batch dimensions (shape length - 1), defaulting to 1
+        q_input_val = q_input.meta.get("val", None)
+        q_input_shape = getattr(q_input_val, "shape", None)
+        if q_input_shape is not None:
+            num_nonbatch_dims = max(len(q_input_shape) - 1, 1)
+
         return cls(
             per_channel=False,  # True is not valid
             q_input=q_input,
diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp
@@ -512,11 +512,6 @@ Error defineTensor(
             buffer_ptr == nullptr,
             Internal,
             "Dynamically quantized tensor should not have constant data but found non-nullptr");
-        // TODO(T179441835): Dynamic Quantization with num_nonbatch_dims > 1
-        ET_CHECK_OR_RETURN_ERROR(
-            qparams->num_nonbatch_dims() == 1,
-            Internal,
-            "Dynamically Quantized Tensors currently only support per token quantization");
         status = xnn_define_dynamically_quantized_tensor_value(
             /*subgraph=*/subgraph_ptr,
             /*datatype=*/getDataType(tensor_value->datatype()),