Skip to content

Commit 811633e

Browse files
committed
switch to new api
1 parent a856915 commit 811633e

File tree

5 files changed

+102
-62
lines changed

5 files changed

+102
-62
lines changed

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -719,23 +719,26 @@ def AMDGPU_SchedBarrierOp :
719719
}];
720720
}
721721

722-
def AMDGPU_WaitcntOp :
723-
AMDGPU_Op<"waitcnt">,
722+
def AMDGPU_MemoryCounterWaitOp :
723+
AMDGPU_Op<"memory_counter_wait">,
724724
Arguments<(ins
725-
OptionalAttr<I32Attr>:$vmcnt,
726-
OptionalAttr<I32Attr>:$expcnt,
727-
OptionalAttr<I32Attr>:$lgkmcnt
725+
OptionalAttr<I32Attr>:$load,
726+
OptionalAttr<I32Attr>:$store,
727+
OptionalAttr<I32Attr>:$ds,
728+
OptionalAttr<I32Attr>:$exp
728729
)>
729730
{
730-
let summary = "Wrapper on ROCDL SWaitcntOp";
731+
let summary = "Wait for specified hardware counters";
731732
let description = [{
732-
Covenience wrapper on `rocdl.s.waitcnt`. Hides the architecture specific
733-
bitpacking from user. Missing values will be assumed maximum values supported
734-
by the architecture. Large values will also be clamped to the maximum
735-
supported values.
733+
Wait for the specified counters to be less-than or equal-to the provided
734+
values before continuing.
735+
736+
Counters can lower to different instructions on different architectires,
737+
including clamping to the some HW supported max value or combining multiple
738+
counters into one.
736739
}];
737740
let assemblyFormat = [{
738-
oilist( `vmcnt` `(` $vmcnt `)` | `expcnt` `(` $expcnt `)` | `lgkmcnt` `(` $lgkmcnt `)` ) attr-dict
741+
oilist( `load` `(` $load `)` | `store` `(` $store `)` | `ds` `(` $ds `)` | `exp` `(` $exp `)` ) attr-dict
739742
}];
740743
}
741744

mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -465,28 +465,50 @@ static FailureOr<unsigned> encodeWaitcnt(Chipset chipset, unsigned vmcnt,
465465
return failure();
466466
}
467467

468-
struct WaitcntOpLowering : public ConvertOpToLLVMPattern<WaitcntOp> {
469-
WaitcntOpLowering(const LLVMTypeConverter &converter, Chipset chipset)
470-
: ConvertOpToLLVMPattern<WaitcntOp>(converter), chipset(chipset) {}
468+
struct MemoryCounterWaitOpLowering
469+
: public ConvertOpToLLVMPattern<MemoryCounterWaitOp> {
470+
MemoryCounterWaitOpLowering(const LLVMTypeConverter &converter,
471+
Chipset chipset)
472+
: ConvertOpToLLVMPattern<MemoryCounterWaitOp>(converter),
473+
chipset(chipset) {}
471474

472475
Chipset chipset;
473476

474477
LogicalResult
475-
matchAndRewrite(WaitcntOp op, OpAdaptor adaptor,
478+
matchAndRewrite(MemoryCounterWaitOp op, OpAdaptor adaptor,
476479
ConversionPatternRewriter &rewriter) const override {
480+
if (chipset.majorVersion >= 12) {
481+
Location loc = op.getLoc();
482+
if (auto ds = adaptor.getDs())
483+
rewriter.create<ROCDL::WaitDscntOp>(loc, *ds);
484+
485+
if (auto load = adaptor.getLoad())
486+
rewriter.create<ROCDL::WaitLoadcntOp>(loc, *load);
487+
488+
if (auto store = adaptor.getStore())
489+
rewriter.create<ROCDL::WaitStorecntOp>(loc, *store);
490+
491+
if (auto exp = adaptor.getExp())
492+
rewriter.create<ROCDL::WaitExpcntOp>(loc, *exp);
493+
494+
return success();
495+
}
496+
477497
auto getVal = [](Attribute attr) -> unsigned {
478498
if (attr)
479499
return cast<IntegerAttr>(attr).getInt();
480500

481501
// This value will be clamped to the maximum value for the chipset.
482502
return 1024 * 1024;
483503
};
484-
unsigned vmcnt = getVal(adaptor.getVmcntAttr());
485-
unsigned expcnt = getVal(adaptor.getExpcntAttr());
486-
unsigned lgkmcnt = getVal(adaptor.getLgkmcntAttr());
504+
unsigned ds = getVal(adaptor.getDsAttr());
505+
unsigned load = getVal(adaptor.getLoadAttr());
506+
unsigned store = getVal(adaptor.getStoreAttr());
507+
unsigned exp = getVal(adaptor.getExpAttr());
508+
509+
unsigned vmcnt = std::min(load, store);
487510

488-
FailureOr<unsigned> waitcnt =
489-
encodeWaitcnt(chipset, vmcnt, expcnt, lgkmcnt);
511+
FailureOr<unsigned> waitcnt = encodeWaitcnt(chipset, vmcnt, exp, ds);
490512
if (failed(waitcnt))
491513
return op.emitOpError("unsupported chipset");
492514

@@ -1901,7 +1923,7 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
19011923
ROCDL::RawPtrBufferAtomicUminOp>,
19021924
RawBufferOpLowering<RawBufferAtomicCmpswapOp,
19031925
ROCDL::RawPtrBufferAtomicCmpSwap>,
1904-
AMDGPUDPPLowering, WaitcntOpLowering, LDSBarrierOpLowering,
1926+
AMDGPUDPPLowering, MemoryCounterWaitOpLowering, LDSBarrierOpLowering,
19051927
SchedBarrierOpLowering, MFMAOpLowering, ScaledMFMAOpLowering,
19061928
WMMAOpLowering, ExtPackedFp8OpLowering, ScaledExtPackedOpLowering,
19071929
PackedScaledTruncOpLowering, PackedTrunc2xFp8OpLowering,
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx942 | FileCheck %s --check-prefixes=CHECK,GFX9
2+
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10
3+
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11
4+
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1201 | FileCheck %s --check-prefixes=CHECK,GFX12
5+
6+
// CHECK-LABEL: func @memory_counter_wait
7+
func.func @memory_counter_wait() {
8+
// GFX9: rocdl.s.waitcnt 53119
9+
// GFX10: rocdl.s.waitcnt 65407
10+
// GFX11: rocdl.s.waitcnt 65527
11+
// GFX12-NOT: rocdl.s.wait.loadcnt
12+
// GFX12-NOT: rocdl.s.wait.storecnt
13+
// GFX12-NOT: rocdl.s.wait.expcnt
14+
// GFX12-NOT: rocdl.s.wait.dscnt
15+
amdgpu.memory_counter_wait
16+
17+
// GFX9: rocdl.s.waitcnt 3952
18+
// GFX10: rocdl.s.waitcnt 16240
19+
// GFX11: rocdl.s.waitcnt 1015
20+
// GFX12: rocdl.s.wait.loadcnt 0
21+
amdgpu.memory_counter_wait load(0)
22+
23+
// GFX9: rocdl.s.waitcnt 3952
24+
// GFX10: rocdl.s.waitcnt 16240
25+
// GFX11: rocdl.s.waitcnt 1015
26+
// GFX12: rocdl.s.wait.storecnt 0
27+
amdgpu.memory_counter_wait store(0)
28+
29+
// GFX9: rocdl.s.waitcnt 53007
30+
// GFX10: rocdl.s.waitcnt 65295
31+
// GFX11: rocdl.s.waitcnt 65520
32+
// GFX12: rocdl.s.wait.expcnt 0
33+
amdgpu.memory_counter_wait exp(0)
34+
35+
// GFX9: rocdl.s.waitcnt 49279
36+
// GFX10: rocdl.s.waitcnt 49279
37+
// GFX11: rocdl.s.waitcnt 64519
38+
// GFX12: rocdl.s.wait.dscnt 0
39+
amdgpu.memory_counter_wait ds(0)
40+
41+
return
42+
}

mlir/test/Conversion/AMDGPUToROCDL/waitcnt.mlir

Lines changed: 0 additions & 29 deletions
This file was deleted.

mlir/test/Dialect/AMDGPU/ops.mlir

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -549,17 +549,19 @@ func.func @gather_to_lds(%idx1 : index, %idx2 : index, %mem1 : memref<32xf16>, %
549549
func.return
550550
}
551551

552-
// CHECK-LABEL: func @waitcnt
553-
func.func @waitcnt() {
554-
// CHECK: amdgpu.waitcnt vmcnt(1) expcnt(2) lgkmcnt(3)
555-
// CHECK: amdgpu.waitcnt vmcnt(3) expcnt(2) lgkmcnt(1)
556-
// CHECK: amdgpu.waitcnt vmcnt(1)
557-
// CHECK: amdgpu.waitcnt expcnt(2)
558-
// CHECK: amdgpu.waitcnt lgkmcnt(3)
559-
amdgpu.waitcnt vmcnt(1) expcnt(2) lgkmcnt(3)
560-
amdgpu.waitcnt lgkmcnt(1) expcnt(2) vmcnt(3)
561-
amdgpu.waitcnt vmcnt(1)
562-
amdgpu.waitcnt expcnt(2)
563-
amdgpu.waitcnt lgkmcnt(3)
552+
// CHECK-LABEL: func @memory_counter_wait
553+
func.func @memory_counter_wait() {
554+
// CHECK: amdgpu.memory_counter_wait load(1) store(2) ds(3) exp(4)
555+
// CHECK: amdgpu.memory_counter_wait load(4) store(2) ds(3) exp(1)
556+
// CHECK: amdgpu.memory_counter_wait load(1)
557+
// CHECK: amdgpu.memory_counter_wait store(2)
558+
// CHECK: amdgpu.memory_counter_wait ds(3)
559+
// CHECK: amdgpu.memory_counter_wait exp(4)
560+
amdgpu.memory_counter_wait load(1) store(2) ds(3) exp(4)
561+
amdgpu.memory_counter_wait exp(1) store(2) ds(3) load(4)
562+
amdgpu.memory_counter_wait load(1)
563+
amdgpu.memory_counter_wait store(2)
564+
amdgpu.memory_counter_wait ds(3)
565+
amdgpu.memory_counter_wait exp(4)
564566
func.return
565567
}

0 commit comments

Comments
 (0)