Skip to content

Commit 5492301

Browse files
authored
[LLVMCPU] Set stack allocation limit as a HAL target property (#20209)
As long as the limit is defined arbitrarily as a pass-specific CLI option, tiling/bufferization passes have no way of accounting for it. This change would be the first step towards addressing compilation failures in `LLVMCPUCheckIRBeforeLLVMConversionPass` systemically before further tuning of the tiling/fusion logic gets done for particular benchmarks. A basic LIT test is also added to make sure that the limit can still be overriden through a CLI option. This can be expanded further to test other default attribute values. Signed-off-by: Artem Gindinson <[email protected]> --------- Signed-off-by: Artem Gindinson <[email protected]>
1 parent 1c10dac commit 5492301

File tree

6 files changed

+75
-10
lines changed

6 files changed

+75
-10
lines changed

compiler/plugins/target/LLVMCPU/LLVMTargetOptions.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ void LLVMTarget::storeToConfigAttrs(MLIRContext *context,
139139
if (vectorWidthInBytes != DEFAULT_VECTOR_WIDTH_IN_BYTES) {
140140
addInt64("native_vector_size", vectorWidthInBytes);
141141
}
142+
addInt64("max_stack_allocation_size", maxStackAllocSizeInBytes);
142143
if (linkEmbedded != DEFAULT_LINK_EMBEDDED) {
143144
addBool("link_embedded", linkEmbedded);
144145
}
@@ -581,6 +582,11 @@ void LLVMCPUTargetCLOptions::bindOptions(OptionsBinder &binder) {
581582
targetVectorWidthInBytes, llvm::cl::cat(category),
582583
llvm::cl::desc("Overrides the native vector register "
583584
"width (in bytes) of the target."));
585+
binder.opt<llvm::cl::PowerOf2ByteSize>(
586+
"iree-llvmcpu-stack-allocation-limit", targetMaxStackAllocSizeInBytes,
587+
llvm::cl::cat(category),
588+
llvm::cl::desc(
589+
"Maximum allowed stack allocation size for LLVM CPU in bytes"));
584590
binder.opt<std::string>(
585591
"iree-llvmcpu-enable-ukernels", enableUkernels, llvm::cl::cat(category),
586592
llvm::cl::desc("Enables ukernels in the llvmcpu backend. May be "
@@ -634,6 +640,7 @@ LLVMTargetOptions LLVMCPUTargetCLOptions::getTargetOptions() {
634640
target.llvmTargetOptions.FloatABIType = targetFloatABI;
635641
target.dataLayout = targetDataLayout;
636642
target.vectorWidthInBytes = targetVectorWidthInBytes;
643+
target.maxStackAllocSizeInBytes = targetMaxStackAllocSizeInBytes.value;
637644
target.ukernels = enableUkernels;
638645
target.linkUkernelBitcode = linkUKernelBitcode;
639646

compiler/plugins/target/LLVMCPU/LLVMTargetOptions.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ enum class SanitizerKind {
3333
struct LLVMTarget {
3434
static constexpr const char *DEFAULT_DATA_LAYOUT = "";
3535
static constexpr int64_t DEFAULT_VECTOR_WIDTH_IN_BYTES = 0;
36+
static constexpr int64_t DEFAULT_MAX_STACK_ALLOC_SIZE_IN_BYTES = 32768;
3637
static constexpr bool DEFAULT_LINK_EMBEDDED = true;
3738
static constexpr bool DEFAULT_DEBUG_SYMBOLS = true;
3839
static constexpr SanitizerKind DEFAULT_SANITIZER_KIND = SanitizerKind::kNone;
@@ -88,6 +89,7 @@ struct LLVMTarget {
8889
std::string dataLayout = DEFAULT_DATA_LAYOUT;
8990
// Overrides the vector width (in bytes) of the target.
9091
int64_t vectorWidthInBytes = DEFAULT_VECTOR_WIDTH_IN_BYTES;
92+
int64_t maxStackAllocSizeInBytes = DEFAULT_MAX_STACK_ALLOC_SIZE_IN_BYTES;
9193

9294
llvm::PipelineTuningOptions pipelineTuningOptions;
9395
// Optimization level to be used by the LLVM optimizer (middle-end).
@@ -194,6 +196,8 @@ struct LLVMCPUTargetCLOptions {
194196
llvm::FloatABI::ABIType targetFloatABI = LLVMTarget::DEFAULT_FLOAT_ABI;
195197
std::string targetDataLayout = LLVMTarget::DEFAULT_DATA_LAYOUT;
196198
unsigned targetVectorWidthInBytes = LLVMTarget::DEFAULT_VECTOR_WIDTH_IN_BYTES;
199+
llvm::cl::PowerOf2ByteSize targetMaxStackAllocSizeInBytes =
200+
LLVMTarget::DEFAULT_MAX_STACK_ALLOC_SIZE_IN_BYTES;
197201
std::string enableUkernels = LLVMTarget::DEFAULT_ENABLE_UKERNELS;
198202
bool linkUKernelBitcode = LLVMTarget::DEFAULT_LINK_UKERNEL_BITCODE;
199203
bool listTargets; // Ignored - used with llvm::cl::ValueDisallowed.

compiler/plugins/target/LLVMCPU/test/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ iree_lit_test_suite(
1616
name = "lit",
1717
srcs = enforce_glob(
1818
[
19+
"hal_target_device_attributes.mlir",
1920
"materialize_homogeneous_encodings.mlir",
2021
"smoketest_embedded.mlir",
2122
"smoketest_system.mlir",
@@ -24,8 +25,10 @@ iree_lit_test_suite(
2425
),
2526
cfg = "//compiler:lit.cfg.py",
2627
tools = [
28+
"//tools:iree-compile",
2729
"//tools:iree-opt",
2830
"@llvm-project//lld",
2931
"@llvm-project//llvm:FileCheck",
32+
"@llvm-project//llvm:not",
3033
],
3134
)

compiler/plugins/target/LLVMCPU/test/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,16 @@ iree_lit_test_suite(
1414
NAME
1515
lit
1616
SRCS
17+
"hal_target_device_attributes.mlir"
1718
"materialize_homogeneous_encodings.mlir"
1819
"smoketest_embedded.mlir"
1920
"smoketest_system.mlir"
2021
TOOLS
2122
${IREE_LLD_TARGET}
2223
FileCheck
24+
iree-compile
2325
iree-opt
26+
not
2427
)
2528

2629
### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// This test aims to check default HAL properties for LLVM CPU target, and
2+
// whether CLI options modify the values correctly.
3+
4+
// TODO: Expand the test for more CLI configurations, e.g. different target triples
5+
6+
// RUN: iree-compile --compile-to=preprocessing --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-triple=x86_64-linux-gnu %s \
7+
// RUN: | FileCheck %s --check-prefix=CHECK-X86-DEFAULT
8+
//
9+
// CHECK-X86-DEFAULT: module attributes {stream.affinity.default = #hal.device.affinity<@__device_0>} {
10+
// CHECK-X86-DEFAULT-NEXT: util.global private @__device_0 = #hal.device.target<"local",
11+
// CHECK-X86-DEFAULT-SAME: [#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "", cpu_features = ""
12+
// CHECK-X86-DEFAULT-SAME: max_stack_allocation_size = 32768 : i64
13+
// CHECK-X86-DEFAULT-SAME: native_vector_size = 16 : i64
14+
// CHECK-X86-DEFAULT-SAME: target_triple = "x86_64-unknown-unknown-eabi-elf"
15+
// CHECK-X86-DEFAULT-SAME: }>]> : !hal.device
16+
17+
// RUN: iree-compile --compile-to=preprocessing --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-triple=x86_64-linux-gnu %s \
18+
// RUN: --iree-llvmcpu-stack-allocation-limit=65536 \
19+
// RUN: | FileCheck %s --check-prefix=CHECK-STACK-VALUE
20+
//
21+
// CHECK-STACK-VALUE: module attributes {stream.affinity.default = #hal.device.affinity<@__device_0>} {
22+
// CHECK-STACK-VALUE-NEXT: util.global private @__device_0 = #hal.device.target<"local",
23+
// CHECK-STACK-VALUE-SAME: [#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "", cpu_features = ""
24+
//
25+
// CHECK-STACK-VALUE-SAME: max_stack_allocation_size = 65536 : i64
26+
//
27+
// CHECK-STACK-VALUE-SAME: }>]> : !hal.device
28+
29+
// RUN: not iree-compile --compile-to=preprocessing --iree-hal-target-backends=llvm-cpu --iree-llvmcpu-target-triple=x86_64-linux-gnu %s \
30+
// RUN: --iree-llvmcpu-stack-allocation-limit=64266 \
31+
// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-INCORRECT-OPT-STACK-VALUE
32+
//
33+
// CHECK-INCORRECT-OPT-STACK-VALUE: for the --iree-llvmcpu-stack-allocation-limit option: '64266' value not a power-of-two
34+
35+
module {
36+
util.func public @foo(%arg0: tensor<?xf32>) -> tensor<?xf32> {
37+
util.return %arg0 : tensor<?xf32>
38+
}
39+
}

compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66

77
#include "iree/compiler/Codegen/LLVMCPU/Passes.h"
8+
#include "iree/compiler/Codegen/Utils/Utils.h"
89
#include "llvm/Support/CommandLine.h"
910
#include "mlir/Dialect/MemRef/IR/MemRef.h"
1011
#include "mlir/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.h"
@@ -16,11 +17,6 @@ namespace mlir::iree_compiler {
1617
#define GEN_PASS_DEF_LLVMCPUCHECKIRBEFORELLVMCONVERSIONPASS
1718
#include "iree/compiler/Codegen/LLVMCPU/Passes.h.inc"
1819

19-
static llvm::cl::opt<int> clMaxAllocationSizeInBytes(
20-
"iree-llvmcpu-stack-allocation-limit",
21-
llvm::cl::desc("maximum allowed stack allocation size in bytes"),
22-
llvm::cl::init(32768));
23-
2420
static llvm::cl::opt<unsigned> clAssumedVscaleValue(
2521
"iree-llvmcpu-stack-allocation-assumed-vscale",
2622
llvm::cl::desc(
@@ -38,13 +34,26 @@ struct LLVMCPUCheckIRBeforeLLVMConversionPass
3834
};
3935
} // namespace
4036

41-
/// Returns success if the cummulative stack allocation size is less than the
42-
/// limit set by clMaxAllocationSizeInBytes.
37+
/// Returns success if the cumulative stack allocation size is less than the
38+
/// limit set through --iree-llvmcpu-stack-allocation-limit (or the default
39+
/// defined for HAL LLVMCPU target).
4340
static LogicalResult
4441
checkStackAllocationSize(mlir::FunctionOpInterface funcOp) {
4542
if (funcOp.getFunctionBody().empty())
4643
return success();
4744

45+
// In rare cases where the attribute is not present in the module, a value of
46+
// 32KB will be taken.
47+
unsigned maxAllocationSizeInBytes = 32 * 1024;
48+
auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(funcOp);
49+
if (targetAttr) {
50+
auto nativeAllocationSizeAttr =
51+
getConfigIntegerAttr(targetAttr, "max_stack_allocation_size");
52+
if (nativeAllocationSizeAttr) {
53+
maxAllocationSizeInBytes = nativeAllocationSizeAttr->getInt();
54+
}
55+
}
56+
4857
SmallVector<memref::AllocaOp> allocaOps;
4958
funcOp.walk(
5059
[&](memref::AllocaOp allocaOp) { allocaOps.push_back(allocaOp); });
@@ -91,10 +100,10 @@ checkStackAllocationSize(mlir::FunctionOpInterface funcOp) {
91100
}
92101
cumSize += allocaSize / 8;
93102
}
94-
if (cumSize > clMaxAllocationSizeInBytes) {
103+
if (cumSize > maxAllocationSizeInBytes) {
95104
return funcOp.emitOpError("exceeded stack allocation limit of ")
96-
<< clMaxAllocationSizeInBytes.getValue()
97-
<< " bytes for function. Got " << cumSize << " bytes";
105+
<< maxAllocationSizeInBytes << " bytes for function. Got " << cumSize
106+
<< " bytes";
98107
}
99108
return success();
100109
}

0 commit comments

Comments
 (0)