[Codegen] add transform op for matching attention op (iree-org#22199)

bangtianliu · web-flow · commit f155a52a460c · 2025-10-07T12:14:51.000-04:00
The key difference is that the indexing map is required for attention
operations, so `MatchAttentionOp `does not treat it as optional.

Signed-off-by: Bangtian Liu &lt;liubangtian@gmail.com&gt;
diff --git a/compiler/src/iree/compiler/Preprocessing/Common/test/preprocessing_match_ops.mlir b/compiler/src/iree/compiler/Preprocessing/Common/test/preprocessing_match_ops.mlir
@@ -922,3 +922,189 @@ module attributes {transform.with_named_sequence} {
     transform.yield
   }
 }
+
+// -----
+
+// Verify that the basic attention matcher works.
+
+#map_query = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
+#map_key = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
+#map_value = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d5)>
+#map_scale = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
+#map_output = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+
+// CHECK-LABEL: func.func @attention_ops
+func.func @attention_ops(
+    %query: tensor<2x10x6x4xf16>,
+    %key: tensor<2x10x4x4xf16>,
+    %value: tensor<2x10x4x4xf16>,
+    %scale: f16,
+    %output: tensor<2x10x6x4xf16>,
+    %input_mm: tensor<32x64xf32>,
+    %filter_mm: tensor<64x32xf32>,
+    %output_mm: tensor<32x32xf32>) -> (tensor<2x10x6x4xf16>, tensor<32x32xf32>) {
+
+  // CHECK: iree_linalg_ext.attention
+  // CHECK-SAME:   match_status = "matched"
+  %res1 = iree_linalg_ext.attention {indexing_maps = [#map_query, #map_key, #map_value, #map_scale, #map_output], match_status = "unmatched"}
+      ins(%query, %key, %value, %scale : tensor<2x10x6x4xf16>, tensor<2x10x4x4xf16>, tensor<2x10x4x4xf16>, f16)
+      outs(%output : tensor<2x10x6x4xf16>) {
+    ^bb0(%arg: f32):
+      iree_linalg_ext.yield %arg : f32
+    } -> tensor<2x10x6x4xf16>
+
+  // CHECK: linalg.matmul
+  // CHECK-SAME:   match_status = "unmatched"
+  %res2 = linalg.matmul
+        ins(%input_mm, %filter_mm : tensor<32x64xf32>, tensor<64x32xf32>)
+        outs(%output_mm : tensor<32x32xf32>) {match_status = "unmatched"} -> tensor<32x32xf32>
+
+  return %res1, %res2 : tensor<2x10x6x4xf16>, tensor<32x32xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @match_attention(%op: !transform.any_op {transform.readonly}) -> !transform.any_op {
+    %batch, %m, %k1, %k2, %n =
+      transform.iree.match.attention %op,
+        query_type = f16, key_type = f16, value_type = f16, output_type = f16,
+        indexing_maps = [#map_query, #map_key, #map_value, #map_scale, #map_output] :
+        !transform.any_op -> !transform.param<i64>
+
+    transform.yield %op : !transform.any_op
+  }
+
+  transform.named_sequence @annotate(%op: !transform.any_op {transform.readonly}) {
+    %0 = transform.param.constant "matched" -> !transform.any_param
+    transform.annotate %op "match_status" = %0 : !transform.any_op, !transform.any_param
+    transform.yield
+  }
+
+  transform.named_sequence @__transform_main(%module: !transform.any_op) {
+    transform.foreach_match in %module
+        @match_attention -> @annotate
+      : (!transform.any_op) -> (!transform.any_op)
+    transform.yield
+  }
+}
+
+// -----
+
+// Verify dimension size constraints for the attention op with lowering config.
+
+#map_query = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
+#map_key = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
+#map_value = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d5)>
+#map_scale = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
+#map_output = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+
+#lowering_config = #iree_gpu.lowering_config<{promote_operands = [0, 1]}>
+
+// CHECK-LABEL: func.func @attention_constraints
+func.func @attention_constraints(
+    %query: tensor<2x10x6x4xf16>,
+    %key: tensor<2x10x4x4xf16>,
+    %value: tensor<2x10x4x4xf16>,
+    %scale: f16,
+    %output: tensor<2x10x6x4xf16>) -> tensor<2x10x6x4xf16> {
+
+  // CHECK: iree_linalg_ext.attention
+  // CHECK-SAME:   lowering_config = #iree_gpu.lowering_config<{promote_operands = [0, 1]}>
+  // CHECK-SAME:   match_status = "matched"
+  %res = iree_linalg_ext.attention {
+      indexing_maps = [#map_query, #map_key, #map_value, #map_scale, #map_output],
+      lowering_config = #lowering_config,
+      match_status = "unmatched"}
+      ins(%query, %key, %value, %scale : tensor<2x10x6x4xf16>, tensor<2x10x4x4xf16>, tensor<2x10x4x4xf16>, f16)
+      outs(%output : tensor<2x10x6x4xf16>) {
+    ^bb0(%arg: f32):
+      iree_linalg_ext.yield %arg : f32
+    } -> tensor<2x10x6x4xf16>
+
+  return %res : tensor<2x10x6x4xf16>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @match_attention_all_dims(%op: !transform.any_op {transform.readonly}) -> !transform.any_op {
+    %batch, %m, %n, %k1, %k2 =
+      transform.iree.match.attention %op,
+        query_type = f16, key_type = f16, value_type = f16, output_type = f16,
+        indexing_maps = [#map_query, #map_key, #map_value, #map_scale, #map_output] :
+        !transform.any_op -> !transform.param<i64>
+
+    transform.iree.match.dims_equal %batch, [2, 10] : !transform.param<i64>
+    transform.iree.match.dims_equal %m, [6] : !transform.param<i64>
+    transform.iree.match.dims_equal %k1, [4] : !transform.param<i64>
+    transform.iree.match.dims_equal %k2, [4] : !transform.param<i64>
+    transform.iree.match.dims_equal %n, [4] : !transform.param<i64>
+    transform.yield %op : !transform.any_op
+  }
+
+  transform.named_sequence @annotate(%op: !transform.any_op {transform.readonly}) {
+    %0 = transform.param.constant "matched" -> !transform.any_param
+    transform.annotate %op "match_status" = %0 : !transform.any_op, !transform.any_param
+    transform.yield
+  }
+
+  transform.named_sequence @__transform_main(%module: !transform.any_op) {
+    transform.foreach_match in %module
+        @match_attention_all_dims -> @annotate
+      : (!transform.any_op) -> (!transform.any_op)
+    transform.yield
+  }
+}
+
+// -----
+
+// Verify indexing maps mismatching for the attention op.
+
+#map_query = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
+#map_key = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
+#map_value = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d5)>
+#map_scale = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
+#map_output = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+
+#map_wrong_key = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d5)>
+
+// CHECK-LABEL: func.func @indexing_maps_test
+func.func @indexing_maps_test(
+    %query: tensor<2x10x6x4xf16>,
+    %key: tensor<2x10x4x4xf16>,
+    %value: tensor<2x10x4x4xf16>,
+    %scale: f16,
+    %output: tensor<2x10x6x4xf16>) -> tensor<2x10x6x4xf16> {
+
+  // CHECK: iree_linalg_ext.attention
+  // CHECK-SAME:   maps_match = "unmatched"
+  %res = iree_linalg_ext.attention {indexing_maps = [#map_query, #map_key, #map_value, #map_scale, #map_output], maps_match = "unmatched"}
+      ins(%query, %key, %value, %scale : tensor<2x10x6x4xf16>, tensor<2x10x4x4xf16>, tensor<2x10x4x4xf16>, f16)
+      outs(%output : tensor<2x10x6x4xf16>) {
+    ^bb0(%arg: f32):
+      iree_linalg_ext.yield %arg : f32
+    } -> tensor<2x10x6x4xf16>
+
+  return %res : tensor<2x10x6x4xf16>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @match_with_wrong_maps(%op: !transform.any_op {transform.readonly}) -> !transform.any_op {
+    %batch, %m, %k1, %k2, %n =
+      transform.iree.match.attention %op,
+        query_type = f16, key_type = f16, value_type = f16, output_type = f16,
+        indexing_maps = [#map_query, #map_wrong_key, #map_value, #map_scale, #map_output] :
+        !transform.any_op -> !transform.param<i64>
+    transform.yield %op : !transform.any_op
+  }
+
+  transform.named_sequence @annotate(%op: !transform.any_op {transform.readonly}) {
+    %0 = transform.param.constant "matched" -> !transform.any_param
+    transform.annotate %op "maps_match" = %0 : !transform.any_op, !transform.any_param
+    transform.yield
+  }
+
+  transform.named_sequence @__transform_main(%module: !transform.any_op) {
+    transform.foreach_match in %module
+        @match_with_wrong_maps -> @annotate
+      : (!transform.any_op) -> (!transform.any_op)
+    transform.yield
+  }
+}
diff --git a/compiler/src/iree/compiler/Preprocessing/TransformExtensions/BUILD.bazel b/compiler/src/iree/compiler/Preprocessing/TransformExtensions/BUILD.bazel
@@ -56,6 +56,8 @@ iree_compiler_cc_library(
     ],
     deps = [
         ":PreprocessingExtensionsOpGen",
+        "//compiler/src/iree/compiler/Dialect/LinalgExt/IR",
+        "//compiler/src/iree/compiler/Dialect/LinalgExt/Utils",
         "//compiler/src/iree/compiler/Utils",
         "//llvm-external-projects/iree-dialects:IREEDialectsTransforms",
         "//llvm-external-projects/iree-dialects:IREELinalgTransformDialect",
diff --git a/compiler/src/iree/compiler/Preprocessing/TransformExtensions/CMakeLists.txt b/compiler/src/iree/compiler/Preprocessing/TransformExtensions/CMakeLists.txt
@@ -42,6 +42,8 @@ iree_cc_library(
     MLIRTransformDialectInterfaces
     MLIRTransformUtils
     MLIRValueBoundsOpInterface
+    iree::compiler::Dialect::LinalgExt::IR
+    iree::compiler::Dialect::LinalgExt::Utils
     iree::compiler::Utils
   PUBLIC
 )
diff --git a/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensions.cpp b/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensions.cpp
@@ -6,6 +6,8 @@
 
 #include "iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensions.h"
 
+#include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
+#include "iree/compiler/Dialect/LinalgExt/Utils/IndexingUtils.h"
 #include "iree/compiler/Utils/EquivalenceUtils.h"
 #include "iree/compiler/Utils/ShapeUtils.h"
 #include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h"
@@ -302,6 +304,90 @@ IREE::transform_dialect::MatchContractionOp::matchOperation(
   return DiagnosedSilenceableFailure::success();
 }
 
+DiagnosedSilenceableFailure
+IREE::transform_dialect::MatchAttentionOp::matchOperation(
+    Operation *current, transform::TransformResults &results,
+    transform::TransformState &state) {
+  Location loc = current->getLoc();
+  auto attentionOp = dyn_cast<IREE::LinalgExt::AttentionOp>(current);
+  if (!attentionOp) {
+    return emitSilenceableFailure(loc)
+           << "Operation is not an attention operation.";
+  }
+
+  Type targetQueryType = getQueryType();
+  Type currentQueryType =
+      getElementTypeOrSelf(attentionOp.getQuery().getType());
+  if (currentQueryType != targetQueryType) {
+    return emitSilenceableFailure(loc)
+           << "Query type doesn't match: expected " << targetQueryType
+           << ", got " << currentQueryType;
+  }
+
+  Type targetKeyType = getKeyType();
+  Type currentKeyType = getElementTypeOrSelf(attentionOp.getKey().getType());
+  if (currentKeyType != targetKeyType) {
+    return emitSilenceableFailure(loc)
+           << "Key type doesn't match: expected " << targetKeyType << ", got "
+           << currentKeyType;
+  }
+
+  Type targetValueType = getValueType();
+  Type currentValueType =
+      getElementTypeOrSelf(attentionOp.getValue().getType());
+  if (currentValueType != targetValueType) {
+    return emitSilenceableFailure(loc)
+           << "Value type doesn't match: expected " << targetValueType
+           << ", got " << currentValueType;
+  }
+
+  Type targetOutputType = getOutputType();
+  Type currentOutputType =
+      getElementTypeOrSelf(attentionOp.getOutput().getType());
+  if (currentOutputType != targetOutputType) {
+    return emitSilenceableFailure(loc)
+           << "Output type doesn't match: expected " << targetOutputType
+           << ", got " << currentOutputType;
+  }
+
+  ArrayAttr currentIndexingMaps = attentionOp.getIndexingMaps();
+  ArrayAttr targetIndexingMaps = getIndexingMaps();
+  if (currentIndexingMaps != targetIndexingMaps) {
+    return emitSilenceableFailure(loc) << "indexing maps don't match";
+  }
+
+  FailureOr<IREE::LinalgExt::AttentionOpDetail> maybeOpInfo =
+      IREE::LinalgExt::AttentionOpDetail::get(
+          attentionOp.getQueryMap(), attentionOp.getKeyMap(),
+          attentionOp.getValueMap(), attentionOp.getOutputMap());
+  if (failed(maybeOpInfo)) {
+    return emitSilenceableFailure(loc)
+           << "Failed to infer attention dimensions";
+  }
+  IREE::LinalgExt::AttentionOpDetail opInfo = *maybeOpInfo;
+  SmallVector<int64_t> iterationDomain = attentionOp.getStaticLoopRanges();
+
+  Builder builder(getContext());
+  auto iterationSizes = [&](ArrayRef<int64_t> dimIndices) {
+    return llvm::map_to_vector(dimIndices, [&](int64_t dimIdx) -> Attribute {
+      return builder.getI64IntegerAttr(iterationDomain[dimIdx]);
+    });
+  };
+
+  results.setParams(cast<OpResult>(getBatchDims()),
+                    iterationSizes(opInfo.getBatchDims()));
+  results.setParams(cast<OpResult>(getMDims()),
+                    iterationSizes(opInfo.getMDims()));
+  results.setParams(cast<OpResult>(getNDims()),
+                    iterationSizes(opInfo.getNDims()));
+  results.setParams(cast<OpResult>(getK1Dims()),
+                    iterationSizes(opInfo.getK1Dims()));
+  results.setParams(cast<OpResult>(getK2Dims()),
+                    iterationSizes(opInfo.getK2Dims()));
+
+  return DiagnosedSilenceableFailure::success();
+}
+
 //===----------------------------------------------------------------------===//
 // MatchConvolutionOp
 //===----------------------------------------------------------------------===//
diff --git a/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensionsOps.td b/compiler/src/iree/compiler/Preprocessing/TransformExtensions/PreprocessingExtensionsOps.td

Original file line number	Diff line number	Diff line change
`@@ -42,6 +42,8 @@ iree_cc_library(`
`42`	`42`	`MLIRTransformDialectInterfaces`
`43`	`43`	`MLIRTransformUtils`
`44`	`44`	`MLIRValueBoundsOpInterface`
	`45`	`+ iree::compiler::Dialect::LinalgExt::IR`
	`46`	`+ iree::compiler::Dialect::LinalgExt::Utils`
`45`	`47`	`iree::compiler::Utils`
`46`	`48`	`PUBLIC`
`47`	`49`	`)`