Skip to content

Commit 54cd445

Browse files
authored
[Codegen][Tuner] Add root_op for matvec and reduction along VectorDistribute pipeline (#22348)
Context: While triaging the BOO tuner, I came across this bug " No root ops found" and submitted a PR to fix it. The `VectorDistribute` pipeline also supports reduction and matvec operations through the `setReductionConfig()` function. This PR ensures that the root_op attribute is correctly added along this configuration path. Once the tuner begins supporting matvec and reduction operations, this PR will become directly useful. --------- Signed-off-by: Bangtian Liu <[email protected]>
1 parent 59ce62f commit 54cd445

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ReductionConfigUtils.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,9 @@ LogicalResult setReductionConfig(IREE::GPU::TargetAttr target,
714714
context, CodeGenPipeline::LLVMGPUVectorDistribute, SymbolRefAttr(),
715715
{workgroupSize, 1, 1}, subgroupSize, pipelineConfig);
716716

717+
if (clSetTunerAttr) {
718+
setRootOpInfo(op);
719+
}
717720
return setTranslationInfo(entryPoint, translationInfo);
718721
}
719722

compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_root_op_attribute.mlir

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,42 @@ func.func @matmul(%lhs: tensor<4x4xf32>, %rhs: tensor<4x4xf32>) -> tensor<4x4xf3
1010
}
1111

1212
// CHECK: %2 = linalg.matmul {lowering_config = #{{.*}}, root_op} ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x4xf32>) outs(%1 : tensor<4x4xf32>) -> tensor<4x4xf32>
13+
14+
// -----
15+
16+
func.func @matvec(%matrix: tensor<32000x4096xf16>, %vector: tensor<4096xf16>, %init: tensor<32000xf16>) {
17+
%output = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>]>) binding(0) : !iree_tensor_ext.dispatch.tensor<writeonly:tensor<32000xf16>>
18+
%result = linalg.matvec ins(%matrix, %vector : tensor<32000x4096xf16>, tensor<4096xf16>) outs(%init : tensor<32000xf16>) -> tensor<32000xf16>
19+
iree_tensor_ext.dispatch.tensor.store %result, %output, offsets = [0], sizes = [32000], strides = [1] : tensor<32000xf16> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<32000xf16>>
20+
return
21+
}
22+
23+
// CHECK: #translation = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute
24+
// CHECK-LABEL: func.func @matvec
25+
// CHECK: linalg.matvec
26+
// CHECK-SAME: lowering_config = #iree_gpu.lowering_config
27+
// CHECK-SAME: root_op
28+
29+
// -----
30+
31+
#map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
32+
#map4 = affine_map<(d0, d1, d2, d3) -> (d0, d1)>
33+
34+
func.func @reduction_sum(%input: tensor<2x32x128x4096xf32>, %init: tensor<2x32xf32>) {
35+
%output = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer>]>) binding(0) : !iree_tensor_ext.dispatch.tensor<writeonly:tensor<2x32xf32>>
36+
%result = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
37+
ins(%input : tensor<2x32x128x4096xf32>) outs(%init : tensor<2x32xf32>) {
38+
^bb0(%in: f32, %out: f32):
39+
%add = arith.addf %in, %out : f32
40+
linalg.yield %add : f32
41+
} -> tensor<2x32xf32>
42+
iree_tensor_ext.dispatch.tensor.store %result, %output, offsets = [0, 0], sizes = [2, 32], strides = [1, 1] : tensor<2x32xf32> -> !iree_tensor_ext.dispatch.tensor<writeonly:tensor<2x32xf32>>
43+
return
44+
}
45+
46+
// CHECK: #translation = #iree_codegen.translation_info<pipeline = LLVMGPUVectorDistribute
47+
// CHECK-LABEL: func.func @reduction_sum
48+
// CHECK: %{{.*}} = linalg.generic
49+
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "reduction"]
50+
// CHECK-SAME: lowering_config = #iree_gpu.lowering_config
51+
// CHECK-SAME: root_op

0 commit comments

Comments
 (0)