Skip to content

Commit 936d9e1

Browse files
committed
Another update
1 parent a29668d commit 936d9e1

File tree

4 files changed

+24
-8
lines changed

4 files changed

+24
-8
lines changed

mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ class RewritePatternSet;
2020
template <typename OpT>
2121
class OperationPass;
2222

23+
namespace amdgpu {
24+
struct Chipset;
25+
} // namespace amdgpu
26+
2327
namespace gpu {
2428
class GPUModuleOp;
2529
} // namespace gpu
@@ -32,7 +36,8 @@ class GPUModuleOp;
3236
/// The resulting pattern set should be run over a gpu.module op
3337
void populateGpuToROCDLConversionPatterns(const LLVMTypeConverter &converter,
3438
RewritePatternSet &patterns,
35-
gpu::amd::Runtime runtime);
39+
gpu::amd::Runtime runtime,
40+
amdgpu::Chipset chipset);
3641

3742
/// Configure target to convert from the GPU dialect to ROCDL.
3843
void configureGpuToROCDLConversionLegality(ConversionTarget &target);

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,14 +134,21 @@ struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
134134

135135
struct GPUSubgroupSizeOpToROCDL : ConvertOpToLLVMPattern<gpu::SubgroupSizeOp> {
136136
using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
137+
138+
GPUSubgroupSizeOpToROCDL(const LLVMTypeConverter &converter,
139+
amdgpu::Chipset chipset)
140+
: ConvertOpToLLVMPattern<gpu::SubgroupSizeOp>(converter),
141+
chipset(chipset) {}
142+
137143
LogicalResult
138144
matchAndRewrite(gpu::SubgroupSizeOp op, gpu::SubgroupSizeOp::Adaptor adaptor,
139145
ConversionPatternRewriter &rewriter) const override {
140146
LLVM::ConstantRangeAttr bounds = nullptr;
147+
bool isBeforeGfx10 = chipset.majorVersion < 10;
141148
if (auto upperBoundAttr = op.getUpperBoundAttr()) {
142149
bounds = rewriter.getAttr<LLVM::ConstantRangeAttr>(
143-
/*bitWidth=*/32, /*lower=*/32,
144-
/*upper=*/op.getUpperBoundAttr().getInt());
150+
/*bitWidth=*/32, /*lower=*/isBeforeGfx10 ? 64 : 32,
151+
/*upper=*/op.getUpperBoundAttr().getInt() + 1);
145152
}
146153
Value wavefrontOp = rewriter.create<ROCDL::WavefrontSizeOp>(
147154
op.getLoc(), rewriter.getI32Type(), bounds);
@@ -150,6 +157,8 @@ struct GPUSubgroupSizeOpToROCDL : ConvertOpToLLVMPattern<gpu::SubgroupSizeOp> {
150157
rewriter.replaceOp(op, {wavefrontOp});
151158
return success();
152159
}
160+
161+
const amdgpu::Chipset chipset;
153162
};
154163

155164
struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
@@ -358,7 +367,8 @@ struct LowerGpuOpsToROCDLOpsPass final
358367

359368
populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns,
360369
*maybeChipset);
361-
populateGpuToROCDLConversionPatterns(converter, llvmPatterns, runtime);
370+
populateGpuToROCDLConversionPatterns(converter, llvmPatterns, runtime,
371+
*maybeChipset);
362372
configureGpuToROCDLConversionLegality(target);
363373
if (failed(applyPartialConversion(m, target, std::move(llvmPatterns))))
364374
signalPassFailure();
@@ -406,7 +416,7 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) {
406416

407417
void mlir::populateGpuToROCDLConversionPatterns(
408418
const LLVMTypeConverter &converter, RewritePatternSet &patterns,
409-
mlir::gpu::amd::Runtime runtime) {
419+
mlir::gpu::amd::Runtime runtime, amdgpu::Chipset chipset) {
410420
using gpu::index_lowering::IndexKind;
411421
using gpu::index_lowering::IntrType;
412422
using mlir::gpu::amd::Runtime;
@@ -447,6 +457,7 @@ void mlir::populateGpuToROCDLConversionPatterns(
447457
patterns
448458
.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL, GPUSubgroupSizeOpToROCDL>(
449459
converter);
460+
patterns.add<GPUSubgroupSizeOpToROCDL>(converter, chipset);
450461

451462
populateMathToROCDLConversionPatterns(converter, patterns);
452463
}

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ gpu.module @test_module {
6363
// CHECK: = llvm.sext %{{.*}} : i32 to i64
6464
%subgroupSize = gpu.subgroup_size : index
6565

66-
// CHECK: = rocdl.wavefrontsize range <i32, 32, 64> : i32
66+
// CHECK: = rocdl.wavefrontsize range <i32, 64, 65> : i32
6767
// CHECK: = llvm.sext %{{.*}} : i32 to i64
6868
%subgroupSize2 = gpu.subgroup_size upper_bound 64 : index
6969

mlir/test/Target/LLVMIR/rocdl.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ llvm.func @rocdl_special_regs() -> i32 {
3636
// CHECK: call i32 @llvm.amdgcn.wavefrontsize()
3737
%15 = rocdl.wavefrontsize : i32
3838

39-
// CHECK: call range(i32 32, 64) i32 @llvm.amdgcn.wavefrontsize()
40-
%16 = rocdl.wavefrontsize range <i32, 32, 64> : i32
39+
// CHECK: call range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize()
40+
%16 = rocdl.wavefrontsize range <i32, 32, 65> : i32
4141

4242
llvm.return %1 : i32
4343
}

0 commit comments

Comments
 (0)