Skip to content

Commit 903ab0a

Browse files
bjacobhanhanW
andauthored
Integrate LLVM at 9fa55ec3 (#18670)
Cherry-picks: 1. llvm/llvm-project#110918 2. llvm/llvm-project#110904 3. llvm/llvm-project#110927 The revision disables the pack/unpack decomposition when any of inner tiles is dynamic. Because it leads to unbounded stack allocation (which is introduced by tensor.pad op). It's broken by the `Extend the logic to generalise tensor.pack` commits. See llvm/llvm-project@66f84c8 and llvm/llvm-project@1c01bcb. --------- Signed-off-by: Benoit Jacob <[email protected]> Signed-off-by: hanhanW <[email protected]> Co-authored-by: hanhanW <[email protected]>
1 parent cd48b10 commit 903ab0a

File tree

9 files changed

+114
-16
lines changed

9 files changed

+114
-16
lines changed

compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "mlir/Dialect/Utils/IndexingUtils.h"
3131
#include "mlir/Dialect/Utils/StaticValueUtils.h"
3232
#include "mlir/IR/Matchers.h"
33+
#include "mlir/IR/OpDefinition.h"
3334
#include "mlir/IR/TypeUtilities.h"
3435
#include "mlir/Interfaces/FunctionInterfaces.h"
3536
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -272,14 +273,22 @@ getVectorPreProcStrategy(linalg::LinalgOp linalgOp) {
272273
return VectorPreProcStrategy::None;
273274
}
274275

275-
DictionaryAttr getPipelineConfWithPeelingAttr(MLIRContext *context) {
276+
static DictionaryAttr getPipelineConfWithPeelingAttr(MLIRContext *context) {
276277
auto enableLoopPeelingAttrName = getEnableLoopPeelingAttrName(context);
277278
auto unitAttr = UnitAttr::get(context);
278279

279280
return DictionaryAttr::get(
280281
context, ArrayRef<NamedAttribute>({enableLoopPeelingAttrName, unitAttr}));
281282
}
282283

284+
static DictionaryAttr
285+
getPipelineConfWithDecompositionAttr(MLIRContext *context) {
286+
auto attrName = getEnableDecompositionAttrName(context);
287+
auto unitAttr = UnitAttr::get(context);
288+
return DictionaryAttr::get(context,
289+
ArrayRef<NamedAttribute>({attrName, unitAttr}));
290+
}
291+
283292
/// Looks for the `native_vector_size` attribute in the hal.executable.target
284293
/// looked up from this op.
285294
static int64_t
@@ -1690,11 +1699,23 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
16901699
distTileSizes[pos] = std::max<int64_t>(distTileSizes[pos], 1);
16911700
}
16921701

1702+
// Dynamic inner tiles lead to unbounded stack allocation (which is introduced
1703+
// by tensor.pad op), so we do not decompose the cases. The x86 and risc-v
1704+
// backends prefer to not decompose the ops.
1705+
DictionaryAttr pipelineConfig;
1706+
auto target = IREE::HAL::ExecutableTargetAttr::lookup(entryPointFn);
1707+
bool hasDynamicInnerTile = llvm::any_of(
1708+
op.getMixedTiles(), [](OpFoldResult ofr) { return ofr.is<Value>(); });
1709+
if (!hasDynamicInnerTile && !isX86(target) && !isRISCV(target)) {
1710+
pipelineConfig = getPipelineConfWithDecompositionAttr(op.getContext());
1711+
}
1712+
16931713
SmallVector<int64_t> vecTileSizes = getPackVectorTileSizes(entryPointFn, op);
16941714
TileSizesListType tileSizesList = {distTileSizes, vecTileSizes};
16951715
return setOpConfigAndEntryPointFnTranslation(
16961716
entryPointFn, op, tileSizesList,
1697-
DispatchLoweringPassPipeline::CPUDataTiling);
1717+
DispatchLoweringPassPipeline::CPUDataTiling, /*workgroupSize=*/{},
1718+
/*subgroupSize=*/{}, pipelineConfig);
16981719
}
16991720

17001721
static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
@@ -1718,10 +1739,22 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
17181739
tileSizes[pos] = ShapedType::isDynamic(size) ? 1 : size;
17191740
}
17201741

1742+
// Dynamic inner tiles lead to unbounded stack allocation (which is introduced
1743+
// by tensor.pad op), so we do not decompose the cases. The x86 and risc-v
1744+
// backends prefer to not decompose the ops.
1745+
DictionaryAttr pipelineConfig;
1746+
auto target = IREE::HAL::ExecutableTargetAttr::lookup(entryPointFn);
1747+
bool hasDynamicInnerTile = llvm::any_of(
1748+
op.getMixedTiles(), [](OpFoldResult ofr) { return ofr.is<Value>(); });
1749+
if (!hasDynamicInnerTile && !isX86(target) && !isRISCV(target)) {
1750+
pipelineConfig = getPipelineConfWithDecompositionAttr(op.getContext());
1751+
}
1752+
17211753
TileSizesListType tileSizesList = {distTileSizes, tileSizes};
17221754
return setOpConfigAndEntryPointFnTranslation(
17231755
entryPointFn, op, tileSizesList,
1724-
DispatchLoweringPassPipeline::CPUDataTiling);
1756+
DispatchLoweringPassPipeline::CPUDataTiling, /*workgroupSize=*/{},
1757+
/*subgroupSize=*/{}, pipelineConfig);
17251758
}
17261759

17271760
static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,

compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() {
100100
LLVMCPUPipelineOptions pipelineOpts;
101101
if (isX86(target) || isRISCV(target)) {
102102
pipelineOpts.useConfiguredVectorSizes = false;
103-
pipelineOpts.decomposePackUnPackOps = false;
104103
}
104+
pipelineOpts.decomposePackUnPackOps = isDecompositionEnabled(funcOp);
105105
pipelineOpts.lowerToAVX2 = hasAVX2Feature(target);
106106
pipelineOpts.enableVectorMasking =
107107
isX86(target) || isRISCV(target) ||

compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ func.func @pack() attributes {hal.executable.target = #executable_target_system_
260260
return
261261
}
262262
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[1, 16], [1, 1]]>
263-
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDataTiling>
263+
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDataTiling, {enable_decomposition}>
264264
// CHECK: func.func @pack()
265265
// CHECK-SAME: translation_info = #[[TRANSLATION]]
266266
// CHECK: tensor.pack
@@ -293,14 +293,48 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl
293293
return
294294
}
295295
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64], [32, 16]]>
296-
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDataTiling>
296+
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDataTiling, {enable_decomposition}>
297297
// CHECK: func.func @unpack_outer_dynamic()
298298
// CHECK-SAME: translation_info = #[[TRANSLATION]]
299299
// CHECK: tensor.unpack
300300
// CHECK-SAME: lowering_config = #[[CONFIG]]
301301

302302
// -----
303303

304+
#executable_target_system_elf_arm_64_ = #hal.executable.target<"llvm-cpu", "system-elf-arm_64", {data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", native_vector_size = 16 : index, target_triple = "aarch64-none-linux-android30"}>
305+
#pipeline_layout = #hal.pipeline.layout<constants = 6, bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>
306+
func.func @unpack_fully_dynamic() attributes {hal.executable.target = #executable_target_system_elf_arm_64_} {
307+
%c131072 = arith.constant 131072 : index
308+
%c0 = arith.constant 0 : index
309+
%0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
310+
%1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
311+
%2 = hal.interface.constant.load layout(#pipeline_layout) ordinal(2) : i32
312+
%3 = hal.interface.constant.load layout(#pipeline_layout) ordinal(3) : i32
313+
%4 = hal.interface.constant.load layout(#pipeline_layout) ordinal(4) : i32
314+
%5 = hal.interface.constant.load layout(#pipeline_layout) ordinal(5) : i32
315+
%6 = arith.index_castui %0 : i32 to index
316+
%7 = arith.index_castui %1 : i32 to index
317+
%8 = arith.index_castui %2 : i32 to index
318+
%9 = arith.index_castui %3 : i32 to index
319+
%10 = arith.index_castui %4 : i32 to index
320+
%11 = arith.index_castui %5 : i32 to index
321+
%12 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%6, %7, %10, %11}
322+
%13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%8, %9}
323+
%14 = flow.dispatch.tensor.load %12, offsets = [0, 0, 0, 0], sizes = [%6, %7, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%6, %7, %10, %11} -> tensor<?x?x?x?xi32>
324+
%15 = tensor.empty(%8, %9) : tensor<?x?xi32>
325+
%unpack = tensor.unpack %14 inner_dims_pos = [0, 1] inner_tiles = [%10, %11] into %15 : tensor<?x?x?x?xi32> -> tensor<?x?xi32>
326+
flow.dispatch.tensor.store %unpack, %13, offsets = [0, 0], sizes = [%8, %9], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%8, %9}
327+
return
328+
}
329+
// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64], [1, 1]]>
330+
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDataTiling>
331+
// CHECK: func.func @unpack_fully_dynamic()
332+
// CHECK-SAME: translation_info = #[[TRANSLATION]]
333+
// CHECK: tensor.unpack
334+
// CHECK-SAME: lowering_config = #[[CONFIG]]
335+
336+
// -----
337+
304338
#pipeline_layout = #hal.pipeline.layout<bindings = [
305339
#hal.pipeline.binding<storage_buffer>,
306340
#hal.pipeline.binding<storage_buffer>,

compiler/src/iree/compiler/Codegen/Utils/CPUUtils.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
namespace mlir::iree_compiler {
2121

2222
static const char kLoopPeelingAttrName[] = "enable_loop_peeling";
23+
static const char kDecompositionAttrName[] = "enable_decomposition";
2324

2425
FailureOr<Operation *> getRootOperation(ArrayRef<Operation *> computeOps) {
2526
Operation *rootOperation = nullptr;
@@ -66,6 +67,16 @@ FailureOr<Operation *> getRootOperation(ArrayRef<Operation *> computeOps) {
6667
return rootOperation;
6768
}
6869

70+
StringAttr getEnableDecompositionAttrName(MLIRContext *ctx) {
71+
return StringAttr::get(ctx, kDecompositionAttrName);
72+
}
73+
74+
bool isDecompositionEnabled(FunctionOpInterface funcOp) {
75+
DictionaryAttr config = getTranslationInfo(funcOp).getConfiguration();
76+
77+
return config && config.contains(kDecompositionAttrName);
78+
}
79+
6980
StringAttr getEnableLoopPeelingAttrName(MLIRContext *ctx) {
7081
return StringAttr::get(ctx, kLoopPeelingAttrName);
7182
}

compiler/src/iree/compiler/Codegen/Utils/CPUUtils.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,15 @@ namespace mlir::iree_compiler {
1919
/// to the end of the function is the root op.
2020
FailureOr<Operation *> getRootOperation(ArrayRef<Operation *> computeOps);
2121

22+
/// Creates a string attribute containing the name of the attribute that is
23+
/// used to enable decomposition.
24+
StringAttr getEnableDecompositionAttrName(MLIRContext *ctx);
25+
26+
/// Checks whether loop peeling has been enabled for the input function. This
27+
/// is infered from the config dict. attribute that's part of to the
28+
/// translation info corresponding to this funciton.
29+
bool isDecompositionEnabled(FunctionOpInterface funcOp);
30+
2231
/// Creates a string attribute containing the name of the attribute that is
2332
/// used to enable loop peeling.
2433
StringAttr getEnableLoopPeelingAttrName(MLIRContext *ctx);

compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,8 @@ static void addExecutableSubstitutionPasses(OpPassManager &passManager,
176176
}
177177
if (!substitutions.empty()) {
178178
SubstituteExecutablesPassOptions substituteOptions;
179-
substituteOptions.substitutions = substitutions;
179+
substituteOptions.substitutions.assign(substitutions.begin(),
180+
substitutions.end());
180181
passManager.addPass(
181182
IREE::HAL::createSubstituteExecutablesPass(substituteOptions));
182183
}
@@ -197,12 +198,19 @@ void buildHALDeviceAssignmentPassPipeline(
197198
// Today we just assign devices from parameters but we should instead be
198199
// performing analysis at the flow level and then doing magic device
199200
// database lookups here.
200-
passManager.addPass(IREE::HAL::createAssignLegacyTargetDevicesPass(
201-
{&targetRegistry, assignmentOptions.legacyTargetBackends}));
201+
AssignLegacyTargetDevicesPassOptions options;
202+
options.targetRegistry = &targetRegistry;
203+
options.targetBackends.assign(
204+
assignmentOptions.legacyTargetBackends.begin(),
205+
assignmentOptions.legacyTargetBackends.end());
206+
passManager.addPass(
207+
IREE::HAL::createAssignLegacyTargetDevicesPass(options));
202208
}
203209
if (!assignmentOptions.targetDevices.empty()) {
204-
passManager.addPass(IREE::HAL::createAssignTargetDevicesPass(
205-
{assignmentOptions.targetDevices}));
210+
AssignTargetDevicesPassOptions options;
211+
options.targetDevices.assign(assignmentOptions.targetDevices.begin(),
212+
assignmentOptions.targetDevices.end());
213+
passManager.addPass(IREE::HAL::createAssignTargetDevicesPass(options));
206214
}
207215

208216
// Create globals for each device (if needed).

compiler/src/iree/compiler/Dialect/VM/Tools/VMOpEncoderGen.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ bool emitEncodeFnDefs(const llvm::RecordKeeper &recordKeeper, raw_ostream &os) {
6262
}
6363

6464
os << " if (";
65-
auto printOneCondition = [&](Record *encodingExpr) {
65+
auto printOneCondition = [&](const Record *encodingExpr) {
6666
StringRef expr = encodingExpr->getValueAsString("expr");
6767
std::vector<StringRef> params =
6868
encodingExpr->getValueAsListOfStrings("params");

compiler/src/iree/compiler/GlobalOptimization/Passes.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,12 @@ void buildGlobalOptimizationPassPipeline(
8181
// parameters are available for folding.
8282
if (!transformOptions.options.parameterImportPaths.empty()) {
8383
IREE::IO::Parameters::ImportParametersPassOptions importParametersOptions;
84-
importParametersOptions.scopePaths =
85-
transformOptions.options.parameterImportPaths;
86-
importParametersOptions.keys = transformOptions.options.parameterImportKeys;
84+
importParametersOptions.scopePaths.assign(
85+
transformOptions.options.parameterImportPaths.begin(),
86+
transformOptions.options.parameterImportPaths.end());
87+
importParametersOptions.keys.assign(
88+
transformOptions.options.parameterImportKeys.begin(),
89+
transformOptions.options.parameterImportKeys.end());
8790
importParametersOptions.maximumSize =
8891
transformOptions.options.parameterImportMaximumSize;
8992
mainPassManager.addPass(IREE::IO::Parameters::createImportParametersPass(

third_party/llvm-project

Submodule llvm-project updated 1112 files

0 commit comments

Comments
 (0)