Skip to content

Commit b014265

Browse files
authored
[mlir][AMDGPU] New gfx12 barrier instructions and update lowering LDSBarrierOp (#109273)
New gfx12 barrier instructions: s.barrier.signal, s.barrier.wait and s.wait.dscnt. And update lowering LDSBarrierOp accordingly. CC: @krzysz00 @manupak @giuseros
1 parent c24418a commit b014265

File tree

5 files changed

+98
-22
lines changed

5 files changed

+98
-22
lines changed

mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,12 @@ class ROCDL_IntrPure1Op<string mnemonic> :
8888

8989
class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
9090
list<int> overloadedOperands, list<Trait> traits, int numResults,
91-
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0> :
91+
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
92+
list<string> immArgAttrNames = []> :
9293
LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
9394
"amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,
9495
overloadedOperands, traits, numResults, requiresAccessGroup,
95-
requiresAliasAnalysis>;
96+
requiresAliasAnalysis, 0, immArgPositions, immArgAttrNames>;
9697

9798
//===----------------------------------------------------------------------===//
9899
// ROCDL special register op definitions
@@ -255,6 +256,26 @@ def ROCDL_BarrierOp : ROCDL_Op<"barrier"> {
255256
let assemblyFormat = "attr-dict";
256257
}
257258

259+
def ROCDL_BarrierSignalOp : ROCDL_IntrOp<"s.barrier.signal", [], [], [], 0, 0, 0, [0], ["id"]>,
260+
Arguments<(ins I32Attr:$id)> {
261+
let results = (outs);
262+
let assemblyFormat = "$id attr-dict";
263+
}
264+
265+
def ROCDL_BarrierWaitOp : ROCDL_IntrOp<"s.barrier.wait", [], [], [], 0, 0, 0, [0], ["id"]>,
266+
Arguments<(ins I16Attr:$id)> {
267+
let results = (outs);
268+
let assemblyFormat = "$id attr-dict";
269+
string llvmBuilder =
270+
"createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier_wait,builder.getInt16(op.getId()));";
271+
}
272+
273+
def ROCDL_WaitDscntOp: ROCDL_IntrOp<"s.wait.dscnt", [], [], [], 0, 0, 0, [0], ["id"]>,
274+
Arguments<(ins I16Attr:$id)> {
275+
let results = (outs);
276+
let assemblyFormat = "$id attr-dict";
277+
}
278+
258279
def ROCDL_SetPrioOp : ROCDL_IntrOp<"s.setprio", [], [], [], 0>,
259280
Arguments<(ins I16Attr:$priority)> {
260281
let results = (outs);

mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -301,27 +301,35 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
301301
/*operand_attrs=*/ArrayAttr());
302302
return success();
303303
}
304-
constexpr int32_t ldsOnlyBitsGfx6789 = ~(0x1f << 8);
305-
constexpr int32_t ldsOnlyBitsGfx10 = ~(0x3f << 8);
306-
// Left in place in case someone disables the inline ASM path or future
307-
// chipsets use the same bit pattern.
308-
constexpr int32_t ldsOnlyBitsGfx11 = ~(0x3f << 4);
309-
310-
int32_t ldsOnlyBits;
311-
if (chipset.majorVersion == 11)
312-
ldsOnlyBits = ldsOnlyBitsGfx11;
313-
else if (chipset.majorVersion == 10)
314-
ldsOnlyBits = ldsOnlyBitsGfx10;
315-
else if (chipset.majorVersion <= 9)
316-
ldsOnlyBits = ldsOnlyBitsGfx6789;
317-
else
318-
return op.emitOpError(
319-
"don't know how to lower this for chipset major version")
320-
<< chipset.majorVersion;
304+
if (chipset.majorVersion < 12) {
305+
constexpr int32_t ldsOnlyBitsGfx6789 = ~(0x1f << 8);
306+
constexpr int32_t ldsOnlyBitsGfx10 = ~(0x3f << 8);
307+
// Left in place in case someone disables the inline ASM path or future
308+
// chipsets use the same bit pattern.
309+
constexpr int32_t ldsOnlyBitsGfx11 = ~(0x3f << 4);
310+
311+
int32_t ldsOnlyBits;
312+
if (chipset.majorVersion == 11)
313+
ldsOnlyBits = ldsOnlyBitsGfx11;
314+
else if (chipset.majorVersion == 10)
315+
ldsOnlyBits = ldsOnlyBitsGfx10;
316+
else if (chipset.majorVersion <= 9)
317+
ldsOnlyBits = ldsOnlyBitsGfx6789;
318+
else
319+
return op.emitOpError(
320+
"don't know how to lower this for chipset major version")
321+
<< chipset.majorVersion;
322+
323+
Location loc = op->getLoc();
324+
rewriter.create<ROCDL::WaitcntOp>(loc, ldsOnlyBits);
325+
rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
326+
} else {
327+
Location loc = op->getLoc();
328+
rewriter.create<ROCDL::WaitDscntOp>(loc, 0);
329+
rewriter.create<ROCDL::BarrierSignalOp>(loc, -1);
330+
rewriter.replaceOpWithNewOp<ROCDL::BarrierWaitOp>(op, -1);
331+
}
321332

322-
Location loc = op->getLoc();
323-
rewriter.create<ROCDL::WaitcntOp>(loc, ldsOnlyBits);
324-
rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
325333
return success();
326334
}
327335
};

mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx90a | FileCheck %s --check-prefixes=CHECK,GFX9,GFX90A
33
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10,RDNA
44
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11,RDNA
5+
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1201 | FileCheck %s --check-prefixes=CHECK,GFX12,RDNA
56

67
// CHECK-LABEL: func @gpu_gcn_raw_buffer_load_scalar_i32
78
func.func @gpu_gcn_raw_buffer_load_scalar_i32(%buf: memref<i32>) -> i32 {
@@ -246,6 +247,9 @@ func.func @lds_barrier() {
246247
// GFX10-NEXT: rocdl.s.barrier
247248
// GFX11: llvm.inline_asm has_side_effects asm_dialect = att
248249
// GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
250+
// GFX12: rocdl.s.wait.dscnt 0
251+
// GFX12-NEXT: rocdl.s.barrier.signal -1
252+
// GFX12-NEXT: rocdl.s.barrier.wait -1
249253
amdgpu.lds_barrier
250254
func.return
251255
}

mlir/test/Dialect/LLVMIR/rocdl.mlir

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,28 @@ llvm.func @rocdl.s.barrier() {
352352
rocdl.s.barrier
353353
llvm.return
354354
}
355+
356+
llvm.func @rocdl.s.barrier.signal() {
357+
// CHECK-LABEL: rocdl.s.barrier.signal
358+
// CHECK: rocdl.s.barrier.signal -1
359+
rocdl.s.barrier.signal -1
360+
llvm.return
361+
}
362+
363+
llvm.func @rocdl.s.barrier.wait() {
364+
// CHECK-LABEL: rocdl.s.barrier.wait
365+
// CHECK: rocdl.s.barrier.wait -1
366+
rocdl.s.barrier.wait -1
367+
llvm.return
368+
}
369+
370+
llvm.func @rocdl.s.wait.dscnt() {
371+
// CHECK-LABEL: rocdl.s.wait.dscnt
372+
// CHECK: rocdl.s.wait.dscnt 0
373+
rocdl.s.wait.dscnt 0
374+
llvm.return
375+
}
376+
355377
// -----
356378

357379
// expected-error@below {{attribute attached to unexpected op}}

mlir/test/Target/LLVMIR/rocdl.mlir

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,27 @@ llvm.func @rocdl.barrier() {
142142
llvm.return
143143
}
144144

145+
llvm.func @rocdl.s.barrier.signal() {
146+
// CHECK-LABEL: rocdl.s.barrier.signal
147+
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1)
148+
rocdl.s.barrier.signal -1
149+
llvm.return
150+
}
151+
152+
llvm.func @rocdl.s.barrier.wait() {
153+
// CHECK-LABEL: rocdl.s.barrier.wait
154+
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1)
155+
rocdl.s.barrier.wait -1
156+
llvm.return
157+
}
158+
159+
llvm.func @rocdl.s.wait.dscnt() {
160+
// CHECK-LABEL: rocdl.s.wait.dscnt
161+
// CHECK-NEXT: call void @llvm.amdgcn.s.wait.dscnt(i16 0)
162+
rocdl.s.wait.dscnt 0
163+
llvm.return
164+
}
165+
145166
llvm.func @rocdl.setprio() {
146167
// CHECK: call void @llvm.amdgcn.s.setprio(i16 0)
147168
rocdl.s.setprio 0

0 commit comments

Comments
 (0)