Skip to content

Commit e96e3c0

Browse files
authored
[VectorLayout] Fix insertion of new constOp for non dominate issue. (#18894)
Main motivation of this patch is to resolve issue where we have the same constOp being used by multiple operations. But with a twist where first time the constOp needs a layout is on a op that topologically comes after other ops that use constOp. This will generate a copy of constOp in the location right before the latter op, which is problematic because this constOp will be used by other ops before it. Previously for the test added we get this error: ``` within split at contraction_layout.mlir:1 offset :24:10: note: see current operation: %9 = "arith.addf"(%8, %6) <{fastmath = #arith.fastmath<none>}> : (vector<96x64xf16>, vector<96x64xf16>) -> vector<96x64xf16> within split at contraction_layout.mlir:1 offset :22:19: error: operand #1 does not dominate this use %scaled_rhs = arith.mulf %read_1, %cst_1 : vector<96x64xf16> ``` While minor, this is also problematic because this error seem to stopped layout analysis (but not fatally) S.T it fails to vector distribute in some cases, making it hard to debug. Signed-off-by: Stanley Winata <[email protected]>
1 parent aef6e1f commit e96e3c0

File tree

3 files changed

+46
-4
lines changed

3 files changed

+46
-4
lines changed

compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_vector_distribution.mlir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -666,10 +666,10 @@ builtin.module attributes { transform.with_named_sequence } {
666666
}
667667
// CHECK-LABEL: func.func @resolve_constant_with_multiple_layout_uses
668668
// CHECK-SAME: (%[[ARG0:.+]]: vector<64x64xf16>, %[[ARG0:.+]]: vector<64x64xf16>)
669-
// CHECK: %[[V0:.+]] = arith.constant dense<0.000000e+00> : vector<2x2x16xf16>
670-
// CHECK: %[[V1:.+]] = arith.constant dense<0.000000e+00> : vector<2x2x8xf16>
671-
// CHECK: %[[ADD0:.+]] = arith.addf %{{.+}}, %[[V1]]{{.*}} : vector<2x2x8xf16>
672-
// CHECK: %[[ADD1:.+]] = arith.addf %{{.+}}, %[[V0]]{{.*}} : vector<2x2x16xf16>
669+
// CHECK: %[[V0:.+]] = arith.constant dense<0.000000e+00> : vector<2x2x8xf16>
670+
// CHECK: %[[V1:.+]] = arith.constant dense<0.000000e+00> : vector<2x2x16xf16>
671+
// CHECK: %[[ADD0:.+]] = arith.addf %{{.+}}, %[[V0]]{{.*}} : vector<2x2x8xf16>
672+
// CHECK: %[[ADD1:.+]] = arith.addf %{{.+}}, %[[V1]]{{.*}} : vector<2x2x16xf16>
673673
// CHECK: arith.addf %{{.+}}, %[[ADD0]]{{.*}} : vector<2x2x8xf16>
674674

675675
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {

compiler/src/iree/compiler/Codegen/Common/VectorLayoutAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ ChangeResult DistributionLayout::resolveWithPossibleConflict(
205205
if (!opOperand.get().hasOneUse() && !vectorLayout &&
206206
llvm::dyn_cast_or_null<arith::ConstantOp>(
207207
opOperand.get().getDefiningOp())) {
208+
builder.setInsertionPoint(opOperand.get().getDefiningOp());
208209
Operation *copiedConstOp = builder.clone(*opOperand.get().getDefiningOp());
209210
Value copiedConst = copiedConstOp->getResult(0);
210211
builder.replaceAllUsesExcept(opOperand.get(), copiedConst,

compiler/src/iree/compiler/Codegen/Common/test/vector_layout_analysis.mlir

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,47 @@ builtin.module attributes { transform.with_named_sequence } {
464464

465465
// -----
466466

467+
#contract_layout = #iree_vector_ext.nested_layout<
468+
subgroup_tile = [1, 1],
469+
batch_tile = [3, 2],
470+
outer_tile = [4, 1],
471+
thread_tile = [2, 32],
472+
element_tile = [4, 1],
473+
474+
subgroup_strides = [0, 0],
475+
thread_strides = [32, 1]
476+
>
477+
478+
// This test ensures that we are not running into ops not dominating constantOp operands after layout analysis.
479+
// We simulate that by doing elmentwise op on the value with "layout" i.e scaled_lhs after scaled_rhs.
480+
// If not handled properly, will generate constOp before "scaled_lhs", but would get used also by "scaled_rhs".
481+
builtin.module attributes { transform.with_named_sequence } {
482+
func.func @handle_multiuse_constant(%lhs: vector<96x64xf16>, %rhs: vector<96x64xf16>, %arr: memref<96x64xf16>) -> () {
483+
%c0 = arith.constant 0 : index
484+
%cst = arith.constant 0.000000e+00 : f16
485+
%cst_1 = arith.constant dense<1.562500e-02> : vector<96x64xf16>
486+
// expected-remark @above {{thread_strides = [32, 1]}}
487+
%lhs_layout = iree_vector_ext.to_layout %lhs to layout(#contract_layout) : vector<96x64xf16>
488+
489+
%scaled_rhs = arith.mulf %rhs, %cst_1 : vector<96x64xf16>
490+
// expected-remark @above {{thread_strides = [32, 1]}}
491+
%scaled_lhs = arith.mulf %lhs_layout, %cst_1 : vector<96x64xf16>
492+
// expected-remark @above {{thread_strides = [32, 1]}}
493+
%add = arith.addf %scaled_lhs, %scaled_rhs : vector<96x64xf16>
494+
// expected-remark @above {{thread_strides = [32, 1]}}
495+
vector.transfer_write %add, %arr[%c0, %c0] {in_bounds = [true, true]} : vector<96x64xf16>, memref<96x64xf16>
496+
func.return
497+
}
498+
499+
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
500+
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
501+
transform.iree.test_vector_layout_analysis %top_level_func : !transform.any_op
502+
transform.yield
503+
}
504+
}
505+
506+
// -----
507+
467508
#layout = #iree_vector_ext.nested_layout<
468509
subgroup_tile = [2, 1, 1],
469510
batch_tile = [1, 2, 4],

0 commit comments

Comments
 (0)