more chisets

Hardcode84 · Hardcode84 · commit 5320853675ed · 2025-07-22T17:54:33.000+02:00
Signed-off-by: Ivan Butygin &lt;ivan.butygin@gmail.com&gt;
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -421,8 +421,23 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
 
 // TODO: AMDGPU backend already have all this bitpacking logic, we should move
 // it to some common place.
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
+///     \p Vmcnt = \p Waitcnt[3:0]        (pre-gfx9)
+///     \p Vmcnt = \p Waitcnt[15:14,3:0]  (gfx9,10)
+///     \p Vmcnt = \p Waitcnt[15:10]      (gfx11)
+///     \p Expcnt = \p Waitcnt[6:4]       (pre-gfx11)
+///     \p Expcnt = \p Waitcnt[2:0]       (gfx11)
+///     \p Lgkmcnt = \p Waitcnt[11:8]     (pre-gfx10)
+///     \p Lgkmcnt = \p Waitcnt[13:8]     (gfx10)
+///     \p Lgkmcnt = \p Waitcnt[9:4]      (gfx11)
 static FailureOr<unsigned> encodeWaitcnt(Chipset chipset, unsigned vmcnt,
                                          unsigned expcnt, unsigned lgkmcnt) {
+  if (chipset.majorVersion < 9) {
+    vmcnt = std::min(15u, vmcnt);
+    expcnt = std::min(7u, expcnt);
+    lgkmcnt = std::min(15u, lgkmcnt);
+    return vmcnt | (expcnt << 4) | (lgkmcnt << 8);
+  }
   if (chipset.majorVersion == 9) {
     vmcnt = std::min(63u, vmcnt);
     expcnt = std::min(7u, expcnt);
@@ -432,6 +447,21 @@ static FailureOr<unsigned> encodeWaitcnt(Chipset chipset, unsigned vmcnt,
     unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8);
     return lowBits | highBits | otherCnts;
   }
+  if (chipset.majorVersion == 10) {
+    vmcnt = std::min(63u, vmcnt);
+    expcnt = std::min(7u, expcnt);
+    lgkmcnt = std::min(63u, lgkmcnt);
+    unsigned lowBits = vmcnt & 0xF;
+    unsigned highBits = (vmcnt >> 4) << 14;
+    unsigned otherCnts = (expcnt << 4) | (lgkmcnt << 8);
+    return lowBits | highBits | otherCnts;
+  }
+  if (chipset.majorVersion == 11) {
+    vmcnt = std::min(63u, vmcnt);
+    expcnt = std::min(7u, expcnt);
+    lgkmcnt = std::min(63u, lgkmcnt);
+    return (vmcnt << 10) | expcnt | (lgkmcnt << 4);
+  }
   return failure();
 }
 
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/waitcnt.mlir b/mlir/test/Conversion/AMDGPUToROCDL/waitcnt.mlir
@@ -1,19 +1,28 @@
 // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx942 | FileCheck %s --check-prefixes=CHECK,GFX9
-// TODO: Add more chipsets support
+// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10
+// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11
 
 
 // CHECK-LABEL: func @waitcnt
 func.func @waitcnt() {
   // GFX9: rocdl.s.waitcnt 53119
+  // GFX10: rocdl.s.waitcnt 65407
+  // GFX11: rocdl.s.waitcnt 65527
   amdgpu.waitcnt
 
   // GFX9: rocdl.s.waitcnt 3952
+  // GFX10: rocdl.s.waitcnt 16240
+  // GFX11: rocdl.s.waitcnt 1015
   amdgpu.waitcnt vmcnt(0)
 
   // GFX9: rocdl.s.waitcnt 53007
+  // GFX10: rocdl.s.waitcnt 65295
+  // GFX11: rocdl.s.waitcnt 65520
   amdgpu.waitcnt expcnt(0)
 
   // GFX9: rocdl.s.waitcnt 49279
+  // GFX10: rocdl.s.waitcnt 49279
+  // GFX11: rocdl.s.waitcnt 64519
   amdgpu.waitcnt lgkmcnt(0)
 
   return