[DT] Fix a bug in encoding propagation when there are scalar inputs. (#21596)

hanhanW · web-flow · commit b2b3ed1b8040 · 2025-08-05T17:10:05.000-07:00
A linalg op can take scalars as inputs, and the encoding propagation can
ignore it. It is similar to 0-D tensor, but a scalar is used.

The failure is from the result of QuantizedMatmulToMatmul pass. We can
switch to 0-D tensor as a fix. However, it is a legal linalg op, so
supporting the propagation on scalars is a better fix.

---------

Signed-off-by: hanhanW &lt;hanhan0912@gmail.com&gt;
diff --git a/compiler/src/iree/compiler/DispatchCreation/test/hoist_encoding_ops.mlir b/compiler/src/iree/compiler/DispatchCreation/test/hoist_encoding_ops.mlir
@@ -327,6 +327,41 @@ util.func public @propagate_unset_encoding_through_generic(%arg0: tensor<?x4096x
 
 // -----
 
+#map = affine_map<(d0, d1, d2) -> (d0, d2)>
+#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
+#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
+#map3 = affine_map<(d0, d1) -> (d0, d1)>
+#map4 = affine_map<(d0, d1) -> ()>
+#encoding = #iree_encoding.encoding<operand_index = 2 : index, op_type =  matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2]>
+util.func public @propagate_unset_encoding_through_generic_with_scalar(%arg0: tensor<4096x?xf32, #encoding>, %arg1: f32, %arg2: index) -> tensor<4096x?xf32> {
+  %0 = flow.dispatch.region -> (tensor<4096x?xf32>{%arg2}) {
+    %1 = iree_encoding.unset_encoding %arg0 : tensor<4096x?xf32, #encoding> -> tensor<4096x?xf32>{%arg2}
+    %2 = tensor.empty(%arg2) : tensor<4096x?xf32>
+    %3 = linalg.generic {indexing_maps = [#map3, #map4, #map3], iterator_types = ["parallel", "parallel"]} ins(%1, %arg1 : tensor<4096x?xf32>, f32) outs(%2 : tensor<4096x?xf32>) {
+    ^bb0(%in: f32, %in_0: f32, %out: f32):
+      %4 = arith.mulf %in, %in_0 : f32
+      linalg.yield %4 : f32
+    } -> tensor<4096x?xf32>
+    flow.return %3 : tensor<4096x?xf32>
+  }
+  util.return %0 : tensor<4096x?xf32>
+}
+// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// CHECK-DAG:   #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
+// CHECK-DAG:   #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+// CHECK-DAG:   #[[$ENCODING:.+]] = #iree_encoding.encoding<operand_index = 2 : index, op_type =  matmul, element_types = [f32, f32, f32], user_indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]>
+// CHECK-LABEL: @propagate_unset_encoding_through_generic_with_scalar(
+// CHECK-SAME:    %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:    %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK-SAME:    %[[ARG2:[a-zA-Z0-9]+]]
+// CHECK:         %{{.+}} = flow.dispatch.region -> (tensor<4096x?xf32>{%[[ARG2]]}
+// CHECK:           %[[GENERIC:.+]] = linalg.generic
+// CHECK-SAME:        ins(%[[ARG0]], %[[ARG1]]
+// CHECK:           %[[UNSET_ENCODING:.+]] = iree_encoding.unset_encoding %[[GENERIC]] : tensor<4096x?xf32, #[[$ENCODING]]> -> tensor<4096x?xf32>{%[[ARG2]]}
+// CHECK:           return %[[UNSET_ENCODING]]
+
+// -----
+
 #map = affine_map<(d0, d1, d2) -> (d0, d2)>
 #map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
 #map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
diff --git a/compiler/src/iree/compiler/ExternalInterfaces/EncodingExternalModels.cpp b/compiler/src/iree/compiler/ExternalInterfaces/EncodingExternalModels.cpp
@@ -219,7 +219,12 @@ struct GenericOpPropagationInterface
                 }
 
                 auto operandType =
-                    cast<RankedTensorType>(operand->get().getType());
+                    dyn_cast<RankedTensorType>(operand->get().getType());
+                if (!operandType) {
+                  // Scalar types do not need encodings.
+                  encodedOperands.push_back(operand->get());
+                  continue;
+                }
                 auto resType = RankedTensorType::get(
                     operandType.getShape(), operandType.getElementType(),
                     encoding);