[DT] Support partial load/store for identity encoding resolver. (iree-org#22360)

hanhanW · pstarkcdpr · commit 84c77f875b91 · 2025-11-28T13:55:08.000-08:00
The revision adds the support of partial load/store lowering for identity encoding resolver; it removes the checks from `MaterializeTensorExtDispatchTensorLoadOp` and `MaterializeTensorExtDispatchTensorStoreOp` because they belong to encoding resolver implementation details. The data-tiling encoding resolvers all have the check: https://github.com/iree-org/iree/blob/fcae3fcd1f5032a24ca00d913a6f026cb37edcf1/compiler/src/iree/compiler/Codegen/ExternalInterfaces/Utils.h#L136-L141 The check for padding resolver: https://github.com/iree-org/iree/blob/4127b869cc72b230b56c331e53db7ca71de067b1/compiler/src/iree/compiler/Codegen/ExternalInterfaces/GPUEncodingExternalModels.cpp#L596-L603 Signed-off-by: hanhanW <hanhan0912@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp
@@ -229,13 +229,6 @@ struct MaterializeTensorExtDispatchTensorLoadOp
   matchAndRewrite(IREE::TensorExt::DispatchTensorLoadOp loadOp,
                   OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    // Only handle operations where the load covers the entire
-    // `!iree_tensor_ext.dispatch.tensor` type.
-    // TODO(ravishankarm): Relax this for partial loads.
-    if (!loadOp.isLoadOfWholeSource()) {
-      return rewriter.notifyMatchFailure(loadOp, "unhandled partial loads");
-    }
-
     auto sourceType = loadOp.getSourceType();
     auto boundTensorType = cast<RankedTensorType>(sourceType.getBoundType());
     auto *typeConverter = static_cast<const MaterializeEncodingTypeConverter *>(
@@ -272,13 +265,6 @@ struct MaterializeTensorExtDispatchTensorStoreOp
   matchAndRewrite(IREE::TensorExt::DispatchTensorStoreOp storeOp,
                   OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    // Only handle operations where the store covers the entire
-    // `!iree_tensor_ext.dispatch.tensor` type.
-    // TODO(ravishankarm): Relax this for partial stores.
-    if (!storeOp.isStoreToWholeTarget()) {
-      return rewriter.notifyMatchFailure(storeOp, "unhandled partial stores");
-    }
-
     auto targetType = storeOp.getTargetType();
     auto boundTensorType = cast<RankedTensorType>(targetType.getBoundType());
     auto *typeConverter = static_cast<const MaterializeEncodingTypeConverter *>(
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_for_iree_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/materialize_encoding_for_iree_ops.mlir
@@ -16,7 +16,7 @@
 #encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
 #encoding_rhs = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
 #encoding_result = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
-func.func @matmul_lowering_f32f32f32_identity_resolver() attributes {
+func.func @matmul_lowering_f32f32f32_identity_resolver_full_slices() attributes {
   hal.executable.target = #hal.executable.target<"llvm-cpu", "whatever", {iree.encoding.resolver = #iree_encoding.identity_resolver<>}>
 } {
   %c0 = arith.constant 0 : index
@@ -48,7 +48,7 @@ func.func @matmul_lowering_f32f32f32_identity_resolver() attributes {
       -> !iree_tensor_ext.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
   return
 }
-// CHECK-LABEL: func @matmul_lowering_f32f32f32_identity_resolver()
+// CHECK-LABEL: func @matmul_lowering_f32f32f32_identity_resolver_full_slices()
 //   CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
 //   CHECK-DAG:   %[[M:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(0)
 //   CHECK-DAG:   %[[N:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(1)
@@ -73,6 +73,93 @@ func.func @matmul_lowering_f32f32f32_identity_resolver() attributes {
 
 // -----
 
+#pipeline_layout = #hal.pipeline.layout<constants = 12, bindings = [
+  #hal.pipeline.binding<storage_buffer>,
+  #hal.pipeline.binding<storage_buffer>,
+  #hal.pipeline.binding<storage_buffer>
+]>
+#map = affine_map<(d0, d1, d2) -> (d0, d2)>
+#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
+#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
+#encoding_lhs = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
+#encoding_rhs = #iree_encoding.encoding<operand_index = 1, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
+#encoding_result = #iree_encoding.encoding<operand_index = 2, op_type = matmul, element_types = [f32, f32, f32], user_indexing_maps = [#map, #map1, #map2], iteration_sizes = [?, ?, ?]>
+func.func @matmul_lowering_f32f32f32_identity_resolver_partial_slices() attributes {
+  hal.executable.target = #hal.executable.target<"llvm-cpu", "whatever", {iree.encoding.resolver = #iree_encoding.identity_resolver<>}>
+} {
+  %c0 = arith.constant 0 : index
+  %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index
+  %N = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : index
+  %K = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : index
+  %sizeM = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : index
+  %sizeN = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : index
+  %sizeK = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : index
+  %offsetM = hal.interface.constant.load layout(#pipeline_layout) ordinal(6) : index
+  %offsetN = hal.interface.constant.load layout(#pipeline_layout) ordinal(7) : index
+  %offsetK = hal.interface.constant.load layout(#pipeline_layout) ordinal(8) : index
+  %strideM = hal.interface.constant.load layout(#pipeline_layout) ordinal(9) : index
+  %strideN = hal.interface.constant.load layout(#pipeline_layout) ordinal(10) : index
+  %strideK = hal.interface.constant.load layout(#pipeline_layout) ordinal(11) : index
+  %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0)
+      : !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
+  %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0)
+      : !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_rhs>>{%K, %N}
+  %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0)
+      : !iree_tensor_ext.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
+  %3 = iree_tensor_ext.dispatch.tensor.load %0, offsets = [%offsetM, %offsetK], sizes = [%sizeM, %sizeK], strides = [%strideM, %strideK]
+      : !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_lhs>>{%M, %K}
+      -> tensor<?x?xf32, #encoding_lhs>
+  %4 = iree_tensor_ext.dispatch.tensor.load %1, offsets = [%offsetK, %offsetN], sizes = [%sizeK, %sizeN], strides = [%strideK, %strideN]
+      : !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?xf32, #encoding_rhs>>{%K, %N}
+      -> tensor<?x?xf32, #encoding_rhs>
+  %5 = iree_tensor_ext.dispatch.tensor.load %2, offsets = [%offsetM, %offsetN], sizes = [%sizeM, %sizeN], strides = [%strideM, %strideN]
+      : !iree_tensor_ext.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
+      -> tensor<?x?xf32, #encoding_result>
+  %6 = linalg.matmul
+      ins(%3, %4 : tensor<?x?xf32, #encoding_lhs>,
+                   tensor<?x?xf32, #encoding_rhs>)
+      outs(%5 : tensor<?x?xf32, #encoding_result>)
+      -> tensor<?x?xf32, #encoding_result>
+  iree_tensor_ext.dispatch.tensor.store %6, %2, offsets = [%offsetM, %offsetN], sizes = [%sizeM, %sizeN], strides = [%strideM, %strideN]
+      : tensor<?x?xf32, #encoding_result>
+      -> !iree_tensor_ext.dispatch.tensor<readwrite:tensor<?x?xf32, #encoding_result>>{%M, %N}
+  return
+
+}
+// CHECK-LABEL: func @matmul_lowering_f32f32f32_identity_resolver_partial_slices()
+//   CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[M:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(0)
+//   CHECK-DAG:   %[[N:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(1)
+//   CHECK-DAG:   %[[K:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(2)
+//   CHECK-DAG:   %[[SIZE_M:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(3)
+//   CHECK-DAG:   %[[SIZE_N:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(4)
+//   CHECK-DAG:   %[[SIZE_K:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(5)
+//   CHECK-DAG:   %[[OFFSET_M:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(6)
+//   CHECK-DAG:   %[[OFFSET_N:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(7)
+//   CHECK-DAG:   %[[OFFSET_K:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(8)
+//   CHECK-DAG:   %[[STRIDE_M:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(9)
+//   CHECK-DAG:   %[[STRIDE_N:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(10)
+//   CHECK-DAG:   %[[STRIDE_K:.+]] = hal.interface.constant.load layout(#pipeline_layout) ordinal(11)
+//       CHECK:   %[[LHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(0)
+//  CHECK-SAME:       !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?xf32>>{%[[M]], %[[K]]}
+//       CHECK:   %[[RHS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
+//  CHECK-SAME:       !iree_tensor_ext.dispatch.tensor<readonly:tensor<?x?xf32>>{%[[K]], %[[N]]}
+//       CHECK:   %[[OUTS_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(2)
+//  CHECK-SAME:       !iree_tensor_ext.dispatch.tensor<readwrite:tensor<?x?xf32>>{%[[M]], %[[N]]}
+//       CHECK:   %[[LHS:.+]] = iree_tensor_ext.dispatch.tensor.load %[[LHS_BINDING]]
+//  CHECK-SAME:     offsets = [%[[OFFSET_M]], %[[OFFSET_K]]], sizes = [%[[SIZE_M]], %[[SIZE_K]]], strides = [%[[STRIDE_M]], %[[STRIDE_K]]]
+//       CHECK:   %[[RHS:.+]] = iree_tensor_ext.dispatch.tensor.load %[[RHS_BINDING]]
+//  CHECK-SAME:     offsets = [%[[OFFSET_K]], %[[OFFSET_N]]], sizes = [%[[SIZE_K]], %[[SIZE_N]]], strides = [%[[STRIDE_K]], %[[STRIDE_N]]]
+//       CHECK:   %[[OUTS:.+]] = iree_tensor_ext.dispatch.tensor.load %[[OUTS_BINDING]]
+//  CHECK-SAME:     offsets = [%[[OFFSET_M]], %[[OFFSET_N]]], sizes = [%[[SIZE_M]], %[[SIZE_N]]], strides = [%[[STRIDE_M]], %[[STRIDE_N]]]
+//       CHECK:   %[[RES:.+]] = linalg.matmul
+//  CHECK-SAME:       ins(%[[LHS]], %[[RHS]] :
+//  CHECK-SAME:       outs(%[[OUTS]] :
+//       CHECK:   iree_tensor_ext.dispatch.tensor.store %[[RES]], %[[OUTS_BINDING]]
+//  CHECK-SAME:     offsets = [%[[OFFSET_M]], %[[OFFSET_N]]], sizes = [%[[SIZE_M]], %[[SIZE_N]]], strides = [%[[STRIDE_M]], %[[STRIDE_N]]]
+
+// -----
+
 //----------------------------------------------------------------------------//
 // Test suite using CPU encoding resolvers.
 //----------------------------------------------------------------------------//
diff --git a/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.cpp b/compiler/src/iree/compiler/Dialect/Encoding/IR/EncodingAttrs.cpp
@@ -550,18 +550,9 @@ LogicalResult IdentityResolverAttr::getOffsetsSizesStrides(
     SmallVectorImpl<OpFoldResult> &newOffsets,
     SmallVectorImpl<OpFoldResult> &newSizes,
     SmallVectorImpl<OpFoldResult> &newStrides) const {
-  // Only handle cases where the slice spans the whole
-  // `!iree_tensor_ext.dispatch.tensor` type.
-  // TODO(hanchung): Enable partial slices. It was copied from pattern's
-  // implementaion, i.e., the users, and it can be dropped after we move the
-  // checks to the interface implementations.
-  if (!type.doesSliceSpanWholeTensor(dynamicDims, offsets, sizes, strides)) {
-    return failure();
-  }
-  auto boundTensorType = cast<RankedTensorType>(type.getBoundType());
-  newSizes = getMixedValues(boundTensorType.getShape(), dynamicDims, builder);
-  newOffsets.resize(newSizes.size(), builder.getIndexAttr(0));
-  newStrides.resize(newSizes.size(), builder.getIndexAttr(1));
+  newSizes.assign(sizes.begin(), sizes.end());
+  newOffsets.assign(offsets.begin(), offsets.end());
+  newStrides.assign(strides.begin(), strides.end());
   return success();
 }