Skip to content

Commit 761329c

Browse files
committed
Remove turn on option.
1 parent cb55700 commit 761329c

File tree

4 files changed

+24
-36
lines changed

4 files changed

+24
-36
lines changed

compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -630,9 +630,9 @@ static bool checkForElementwiseUsersWithNewOperands(linalg::LinalgOp linalgOp) {
630630
static FailureOr<std::pair<LoweringConfigAttr, int64_t>>
631631
getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
632632
ArrayRef<int64_t> bounds, ArrayRef<AffineMap> maps,
633-
ArrayRef<Value> operands, IREE::GPU::TargetAttr target, bool useDirectLoad,
634-
bool isGemm, bool scaled, int64_t splitReductionTripCnt,
635-
bool CPromoteIfPadding, bool hasExistingAccumulator = false,
633+
ArrayRef<Value> operands, IREE::GPU::TargetAttr target, bool isGemm,
634+
bool scaled, int64_t splitReductionTripCnt, bool CPromoteIfPadding,
635+
bool hasExistingAccumulator = false,
636636
std::optional<ConvToIgemmInfo> convToIgemmInfo = std::nullopt) {
637637
if (target.getWgp().getMma().empty()) {
638638
return failure();
@@ -924,9 +924,9 @@ getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
924924
}
925925
// Do not use direct load DMA when padding is needed, as the source will
926926
// go through tensor.pad and won't be directly from global memory.
927-
ArrayRef<Attribute> promotionTypes = (useDirectLoad && !couldNeedPadding)
928-
? ArrayRef<Attribute>(promotionArray)
929-
: ArrayRef<Attribute>{};
927+
ArrayRef<Attribute> promotionTypes =
928+
couldNeedPadding ? ArrayRef<Attribute>{}
929+
: ArrayRef<Attribute>(promotionArray);
930930
GPU::appendPromotedOperandsList(context, attrs, promotionList,
931931
promotionTypes);
932932
if (!mustBeAligned || couldNeedPadding) {
@@ -966,9 +966,10 @@ getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
966966
return std::pair{loweringConfig, flatWorkgroupSize};
967967
}
968968

969-
LogicalResult setIGEMMConvolutionLoweringConfig(
970-
IREE::GPU::TargetAttr target, mlir::FunctionOpInterface entryPoint,
971-
Operation *op, bool useDirectLoad, bool padConv) {
969+
LogicalResult
970+
setIGEMMConvolutionLoweringConfig(IREE::GPU::TargetAttr target,
971+
mlir::FunctionOpInterface entryPoint,
972+
Operation *op, bool padConv) {
972973
auto linalgOp = dyn_cast<linalg::LinalgOp>(op);
973974
if (!linalgOp || !linalg::isaConvolutionOpInterface(linalgOp)) {
974975
return failure();
@@ -1042,7 +1043,7 @@ LogicalResult setIGEMMConvolutionLoweringConfig(
10421043
FailureOr<std::pair<LoweringConfigAttr, int64_t>> configAndWgSize =
10431044
getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
10441045
igemmLoopBounds, igemmContractionMaps, igemmOperands, target,
1045-
useDirectLoad, /*isGemm=*/false,
1046+
/*isGemm=*/false,
10461047
/*scaled=*/false, splitReductionTripCnt,
10471048
/*CPromoteIfPadding=*/CPromoteIfPadding, hasExistingAccumulator,
10481049
convToIgemmInfo);
@@ -1055,7 +1056,7 @@ LogicalResult setIGEMMConvolutionLoweringConfig(
10551056
SmallVector<NamedAttribute, 1> pipelineAttrs;
10561057
auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
10571058
linalgOp->getContext(), /*prefetchNumStages=*/2,
1058-
/*no_reduce_shared_memory_bank_conflicts=*/useDirectLoad,
1059+
/*no_reduce_shared_memory_bank_conflicts=*/true,
10591060
/*use_igemm_convolution=*/true,
10601061
/*reorder_workgroups_strategy=*/std::nullopt);
10611062
pipelineAttrs.emplace_back(
@@ -1073,7 +1074,7 @@ LogicalResult setIGEMMConvolutionLoweringConfig(
10731074

10741075
LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
10751076
mlir::FunctionOpInterface entryPoint,
1076-
Operation *op, bool useDirectLoad) {
1077+
Operation *op) {
10771078
auto linalgOp = dyn_cast<linalg::LinalgOp>(op);
10781079
if (!linalgOp ||
10791080
(!linalg::isaContractionOpInterface(linalgOp) &&
@@ -1100,18 +1101,15 @@ LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
11001101

11011102
FailureOr<std::pair<LoweringConfigAttr, int64_t>> configAndWgSize =
11021103
getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
1103-
bounds, maps, operands, target, useDirectLoad, /*isGemm=*/true,
1104+
bounds, maps, operands, target, /*isGemm=*/true,
11041105
/*scaled=*/false, splitReductionTripCnt, CPromoteIfPadding,
11051106
hasExistingAccumulator);
11061107

11071108
// TODO (muzasyed) : add generalization for scaled and nonscaled versions of
11081109
// matmul lowering.
11091110
if (failed(configAndWgSize)) {
1110-
// TODO (muzasyed) : Perform padding appropriately for minimizing bank
1111-
// conflicts when dealing with scaled matmuls. For now it is disabled.
1112-
useDirectLoad = true;
11131111
configAndWgSize = getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
1114-
bounds, maps, operands, target, useDirectLoad, /*isGemm=*/true,
1112+
bounds, maps, operands, target, /*isGemm=*/true,
11151113
/*scaled=*/true, splitReductionTripCnt, CPromoteIfPadding,
11161114
hasExistingAccumulator);
11171115
}
@@ -1125,7 +1123,7 @@ LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
11251123
SmallVector<NamedAttribute, 1> pipelineAttrs;
11261124
auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
11271125
linalgOp->getContext(), /*prefetchNumStages=*/2,
1128-
/*no_reduce_shared_memory_bank_conflicts=*/useDirectLoad,
1126+
/*no_reduce_shared_memory_bank_conflicts=*/true,
11291127
/*use_igemm_convolution=*/false,
11301128
/*reorder_workgroups_strategy=*/std::nullopt);
11311129
pipelineAttrs.emplace_back(

compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,17 @@ setDirectConvolutionLoweringConfig(IREE::GPU::TargetAttr target,
3131
/// specified target.
3232
/// TODO: Currently this only succeeds if the target supports an mma
3333
/// kind. Add support for a fallback direct lowering path.
34-
LogicalResult setIGEMMConvolutionLoweringConfig(
35-
IREE::GPU::TargetAttr target, mlir::FunctionOpInterface entryPoint,
36-
Operation *op, bool useDirectLoad = false, bool padConv = false);
34+
LogicalResult
35+
setIGEMMConvolutionLoweringConfig(IREE::GPU::TargetAttr target,
36+
mlir::FunctionOpInterface entryPoint,
37+
Operation *op, bool padConv = false);
3738

3839
/// Helper for setting up a matmul config based on the specified target.
3940
/// TODO: Currently this only succeeds if the target supports an mma
4041
/// kind. Add support for a fallback direct lowering path.
4142
LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
4243
mlir::FunctionOpInterface entryPoint,
43-
Operation *op,
44-
bool useDirectLoad = false);
44+
Operation *op);
4545

4646
/// Helper for setting up a default tile and fuse config for targeting
4747
/// simple thread distribution. Currently restricted to linalg ops.

compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,6 @@ llvm::cl::opt<bool> clGPUPadConvolution(
128128
llvm::cl::desc("enable pre-padding for convolutions in igemm path"),
129129
llvm::cl::init(true));
130130

131-
static llvm::cl::opt<bool>
132-
clUseDirectLoad("iree-llvmgpu-use-direct-load",
133-
llvm::cl::desc("Use global load DMA for direct load ops."),
134-
llvm::cl::Hidden, llvm::cl::init(false));
135-
136131
static llvm::cl::opt<bool> clDirectConvolution(
137132
"iree-codegen-llvmgpu-use-direct-convolution",
138133
llvm::cl::desc("Use direct convolution in tile and fuse pipeline"),
@@ -2278,8 +2273,8 @@ static LogicalResult setRootConfig(IREE::GPU::TargetAttr target,
22782273
return success();
22792274
}
22802275
if (clGPUUseTileAndFuseMatmul) {
2281-
if (succeeded(IREE::GPU::setMatmulLoweringConfig(
2282-
target, entryPointFn, computeOp, clUseDirectLoad))) {
2276+
if (succeeded(IREE::GPU::setMatmulLoweringConfig(target, entryPointFn,
2277+
computeOp))) {
22832278
LDBG() << "Tile and fuse matmul config";
22842279
return success();
22852280
}
@@ -2293,8 +2288,7 @@ static LogicalResult setRootConfig(IREE::GPU::TargetAttr target,
22932288
}
22942289
if (clLLVMGPUUseIgemm) {
22952290
if (succeeded(IREE::GPU::setIGEMMConvolutionLoweringConfig(
2296-
target, entryPointFn, computeOp, clUseDirectLoad,
2297-
clGPUPadConvolution))) {
2291+
target, entryPointFn, computeOp, clGPUPadConvolution))) {
22982292
LDBG() << "Tile and fuse IGEMM config";
22992293
return success();
23002294
}

tests/e2e/matmul/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,7 +1210,6 @@ iree_generated_e2e_runner_test(
12101210
"hip"
12111211
COMPILER_FLAGS
12121212
${IREE_HIP_TEST_COMPILER_FLAGS}
1213-
"--iree-llvmgpu-use-direct-load"
12141213
LABELS
12151214
"noasan"
12161215
"nomsan"
@@ -1239,7 +1238,6 @@ iree_generated_e2e_runner_test(
12391238
"hip"
12401239
COMPILER_FLAGS
12411240
${IREE_HIP_TEST_COMPILER_FLAGS}
1242-
"--iree-llvmgpu-use-direct-load"
12431241
LABELS
12441242
"noasan"
12451243
"nomsan"
@@ -1268,7 +1266,6 @@ iree_generated_e2e_runner_test(
12681266
"hip"
12691267
COMPILER_FLAGS
12701268
${IREE_HIP_TEST_COMPILER_FLAGS}
1271-
"--iree-llvmgpu-use-direct-load"
12721269
LABELS
12731270
"noasan"
12741271
"nomsan"
@@ -1297,7 +1294,6 @@ iree_generated_e2e_runner_test(
12971294
"hip"
12981295
COMPILER_FLAGS
12991296
${IREE_HIP_TEST_COMPILER_FLAGS}
1300-
"--iree-llvmgpu-use-direct-load"
13011297
LABELS
13021298
"noasan"
13031299
"nomsan"

0 commit comments

Comments
 (0)