Skip to content

Commit 198bdd3

Browse files
committed
refactor
1 parent 135b39d commit 198bdd3

File tree

3 files changed

+21
-44
lines changed

3 files changed

+21
-44
lines changed

compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,16 @@ namespace mlir::iree_compiler::IREE::GPU {
4646
constexpr int64_t kCacheLineSizeBits = 128 * 8;
4747
constexpr int64_t kPreferredCopyNumBits = 128;
4848

49+
bool targetSupportsGlobalLoadDMA(IREE::GPU::TargetAttr target) {
50+
StringRef targetArch = target.getArch();
51+
auto maybeChipset = amdgpu::Chipset::parse(targetArch);
52+
if (failed(maybeChipset)) {
53+
return false;
54+
}
55+
constexpr amdgpu::Chipset kGfx950{9, 5, 0};
56+
return *maybeChipset >= kGfx950;
57+
}
58+
4959
//===----------------------------------------------------------------------===//
5060
// Lowering Config Selection
5161
//===----------------------------------------------------------------------===//
@@ -927,19 +937,10 @@ getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
927937
{"subgroup", b.getI64ArrayAttr(subgroupTileSizes)},
928938
{"mma_kind", kind}};
929939

930-
// Check if target supports global load DMA (gfx950+).
931-
bool supportsGlobalLoadDMA = false;
932-
StringRef targetArch = target.getArch();
933-
if (auto maybeChipset = amdgpu::Chipset::parse(targetArch);
934-
succeeded(maybeChipset)) {
935-
constexpr amdgpu::Chipset kGfx950{9, 5, 0};
936-
supportsGlobalLoadDMA = (*maybeChipset >= kGfx950);
937-
}
938-
939940
// Use global load DMA attribute (subgroup sizes will be derived from
940941
// translation_info) only on gfx950+.
941942
SmallVector<Attribute> promotionArray;
942-
if (supportsGlobalLoadDMA) {
943+
if (targetSupportsGlobalLoadDMA(target)) {
943944
Attribute useGlobalDma = IREE::GPU::UseGlobalLoadDMAAttr::get(context);
944945
promotionArray = {useGlobalDma, useGlobalDma};
945946
}
@@ -1090,20 +1091,11 @@ setIGEMMConvolutionLoweringConfig(IREE::GPU::TargetAttr target,
10901091
std::array<int64_t, 3> workgroupSize = {configAndWgSize->second, 1, 1};
10911092
LoweringConfigAttr loweringConfig = configAndWgSize->first;
10921093

1093-
// Check if target supports global load DMA (gfx950+). Only disable bank
1094-
// conflict reduction for targets that will use direct load DMA.
1095-
bool supportsGlobalLoadDMA = false;
1096-
StringRef targetArch = target.getArch();
1097-
if (auto maybeChipset = amdgpu::Chipset::parse(targetArch);
1098-
succeeded(maybeChipset)) {
1099-
constexpr amdgpu::Chipset kGfx950{9, 5, 0};
1100-
supportsGlobalLoadDMA = (*maybeChipset >= kGfx950);
1101-
}
1102-
11031094
SmallVector<NamedAttribute, 1> pipelineAttrs;
11041095
auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
11051096
linalgOp->getContext(), /*prefetchNumStages=*/2,
1106-
/*no_reduce_shared_memory_bank_conflicts=*/supportsGlobalLoadDMA,
1097+
/*no_reduce_shared_memory_bank_conflicts=*/
1098+
targetSupportsGlobalLoadDMA(target),
11071099
/*use_igemm_convolution=*/true,
11081100
/*reorder_workgroups_strategy=*/std::nullopt);
11091101
pipelineAttrs.emplace_back(
@@ -1168,20 +1160,11 @@ LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
11681160
std::array<int64_t, 3> workgroupSize = {configAndWgSize->second, 1, 1};
11691161
LoweringConfigAttr loweringConfig = configAndWgSize->first;
11701162

1171-
// Check if target supports global load DMA (gfx950+). Only disable bank
1172-
// conflict reduction for targets that will use direct load DMA.
1173-
bool supportsGlobalLoadDMA = false;
1174-
StringRef targetArch = target.getArch();
1175-
if (auto maybeChipset = amdgpu::Chipset::parse(targetArch);
1176-
succeeded(maybeChipset)) {
1177-
constexpr amdgpu::Chipset kGfx950{9, 5, 0};
1178-
supportsGlobalLoadDMA = (*maybeChipset >= kGfx950);
1179-
}
1180-
11811163
SmallVector<NamedAttribute, 1> pipelineAttrs;
11821164
auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
11831165
linalgOp->getContext(), /*prefetchNumStages=*/2,
1184-
/*no_reduce_shared_memory_bank_conflicts=*/supportsGlobalLoadDMA,
1166+
/*no_reduce_shared_memory_bank_conflicts=*/
1167+
targetSupportsGlobalLoadDMA(target),
11851168
/*use_igemm_convolution=*/false,
11861169
/*reorder_workgroups_strategy=*/std::nullopt);
11871170
pipelineAttrs.emplace_back(

compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ LogicalResult setSortConfig(IREE::GPU::TargetAttr target,
5858
mlir::FunctionOpInterface entryPoint,
5959
Operation *op);
6060

61+
/// Returns true if the target supports global load DMA operations.
62+
/// Currently this is only supported on gfx950+ (CDNA4+) architectures.
63+
bool targetSupportsGlobalLoadDMA(IREE::GPU::TargetAttr target);
64+
6165
/// Helper for setting up a memory bound reduction configuration, focusing
6266
/// on getting peak global memory bandwidth.
6367
LogicalResult setReductionConfig(IREE::GPU::TargetAttr target,

compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
#include "llvm/Support/InterleavedRange.h"
3636
#include "llvm/Support/LogicalResult.h"
3737
#include "mlir/Analysis/SliceAnalysis.h"
38-
#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
3938
#include "mlir/Dialect/Linalg/IR/Linalg.h"
4039
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
4140
#include "mlir/IR/Attributes.h"
@@ -1507,20 +1506,11 @@ static LogicalResult setContractConfig(IREE::GPU::TargetAttr target,
15071506
auto configDict = b.getDictionaryAttr(attrs);
15081507
auto loweringConfig =
15091508
IREE::GPU::LoweringConfigAttr::get(context, configDict);
1510-
// Check if target supports global load DMA (gfx950+). Only disable bank
1511-
// conflict reduction for targets that will use direct load DMA.
1512-
bool supportsGlobalLoadDMA = false;
1513-
StringRef targetArch = target.getArch();
1514-
if (auto maybeChipset = amdgpu::Chipset::parse(targetArch);
1515-
succeeded(maybeChipset)) {
1516-
constexpr amdgpu::Chipset kGfx950{9, 5, 0};
1517-
supportsGlobalLoadDMA = (*maybeChipset >= kGfx950);
1518-
}
1519-
15201509
SmallVector<NamedAttribute, 1> pipelineAttrs;
15211510
auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
15221511
context, /*prefetch_num_stages=*/0,
1523-
/*no_reduce_shared_memory_bank_conflicts=*/supportsGlobalLoadDMA,
1512+
/*no_reduce_shared_memory_bank_conflicts=*/
1513+
IREE::GPU::targetSupportsGlobalLoadDMA(target),
15241514
/*use_igemm_convolution=*/false,
15251515
/*reorder_workgroups_strategy=*/std::nullopt);
15261516
pipelineAttrs.emplace_back(

0 commit comments

Comments
 (0)