Skip to content

Commit 5320853

Browse files
committed
more chisets
Signed-off-by: Ivan Butygin <[email protected]>
1 parent f3bc55c commit 5320853

File tree

2 files changed

+40
-1
lines changed

2 files changed

+40
-1
lines changed

mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,23 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
421421

422422
// TODO: AMDGPU backend already have all this bitpacking logic, we should move
423423
// it to some common place.
424+
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
425+
/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
426+
/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
427+
/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
428+
/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
429+
/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
430+
/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
431+
/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
432+
/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
424433
static FailureOr<unsigned> encodeWaitcnt(Chipset chipset, unsigned vmcnt,
425434
unsigned expcnt, unsigned lgkmcnt) {
435+
if (chipset.majorVersion < 9) {
436+
vmcnt = std::min(15u, vmcnt);
437+
expcnt = std::min(7u, expcnt);
438+
lgkmcnt = std::min(15u, lgkmcnt);
439+
return vmcnt | (expcnt << 4) | (lgkmcnt << 8);
440+
}
426441
if (chipset.majorVersion == 9) {
427442
vmcnt = std::min(63u, vmcnt);
428443
expcnt = std::min(7u, expcnt);
@@ -432,6 +447,21 @@ static FailureOr<unsigned> encodeWaitcnt(Chipset chipset, unsigned vmcnt,
432447
unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8);
433448
return lowBits | highBits | otherCnts;
434449
}
450+
if (chipset.majorVersion == 10) {
451+
vmcnt = std::min(63u, vmcnt);
452+
expcnt = std::min(7u, expcnt);
453+
lgkmcnt = std::min(63u, lgkmcnt);
454+
unsigned lowBits = vmcnt & 0xF;
455+
unsigned highBits = (vmcnt >> 4) << 14;
456+
unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8);
457+
return lowBits | highBits | otherCnts;
458+
}
459+
if (chipset.majorVersion == 11) {
460+
vmcnt = std::min(63u, vmcnt);
461+
expcnt = std::min(7u, expcnt);
462+
lgkmcnt = std::min(63u, lgkmcnt);
463+
return (vmcnt << 10) | expcnt | (lgkmcnt << 4);
464+
}
435465
return failure();
436466
}
437467

mlir/test/Conversion/AMDGPUToROCDL/waitcnt.mlir

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,28 @@
11
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx942 | FileCheck %s --check-prefixes=CHECK,GFX9
2-
// TODO: Add more chipsets support
2+
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10
3+
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11
34

45

56
// CHECK-LABEL: func @waitcnt
67
func.func @waitcnt() {
78
// GFX9: rocdl.s.waitcnt 53119
9+
// GFX10: rocdl.s.waitcnt 65407
10+
// GFX11: rocdl.s.waitcnt 65527
811
amdgpu.waitcnt
912

1013
// GFX9: rocdl.s.waitcnt 3952
14+
// GFX10: rocdl.s.waitcnt 16240
15+
// GFX11: rocdl.s.waitcnt 1015
1116
amdgpu.waitcnt vmcnt(0)
1217

1318
// GFX9: rocdl.s.waitcnt 53007
19+
// GFX10: rocdl.s.waitcnt 65295
20+
// GFX11: rocdl.s.waitcnt 65520
1421
amdgpu.waitcnt expcnt(0)
1522

1623
// GFX9: rocdl.s.waitcnt 49279
24+
// GFX10: rocdl.s.waitcnt 49279
25+
// GFX11: rocdl.s.waitcnt 64519
1726
amdgpu.waitcnt lgkmcnt(0)
1827

1928
return

0 commit comments

Comments
 (0)