Skip to content

Commit bc7e391

Browse files
authored
[MLIR][NVGPU] Add mbarrier.get Op (llvm#133221)
The `mbarrier.create` op can create multiple mbarrier objects, and other mbarrier-related ops can access an mbarrier using a dynamic SSA value. This is especially useful when using mbarriers in dynamic loops. This PR adds the `mbarrier.get` op, which returns a pointer to a specific mbarrier object from a group of barriers created by the nvgpu.mbarrier.create operation. It is useful when composing the NVGPU and NVVM dialects. Example: ``` %mbars = nvgpu.mbarrier.create -> !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 10> %mbar_pointer = nvgpu.mbarrier.get %mbars[%c2] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>> -> i32 ```
1 parent 427ce92 commit bc7e391

File tree

3 files changed

+60
-0
lines changed

3 files changed

+60
-0
lines changed

mlir/include/mlir/Dialect/NVGPU/IR/NVGPUOps.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,25 @@ def NVGPU_MBarrierCreateOp : NVGPU_Op<"mbarrier.create", []> {
322322
}];
323323
}
324324

325+
def NVGPU_MBarrierGetOp : NVGPU_Op<"mbarrier.get", []> {
326+
let summary = "Return a pointer to an `nvgpu.mbarrier`.";
327+
let description = [{
328+
The `nvgpu.mbarrier.get` operation retrieves a pointer to a specific
329+
`mbarrier` object from a group of barriers created by the `nvgpu.mbarrier.create` operation.
330+
331+
Example:
332+
```mlir
333+
%mbars = nvgpu.mbarrier.create -> !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 10>
334+
%mbar_pointer = nvgpu.mbarrier.get %mbars[%c2] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>>
335+
```
336+
}];
337+
let arguments = (ins NVGPU_MBarrierGroup:$barriers, Index:$mbarId);
338+
let results = (outs AnyTypeOf<[I32, I64]>:$mbarrierPointer);
339+
let assemblyFormat = [{
340+
$barriers `[` $mbarId `]` attr-dict `:` type($barriers) `->` type($mbarrierPointer)
341+
}];
342+
}
343+
325344
def NVGPU_MBarrierInitOp : NVGPU_Op<"mbarrier.init", []> {
326345
let summary = "Initialize the `nvgpu.mbarrier`.";
327346
let description = [{

mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,24 @@ struct MBarrierBasePattern : public ConvertOpToLLVMPattern<SourceOp> {
819819
}
820820
};
821821

822+
struct NVGPUMBarrierGetLowering
823+
: public MBarrierBasePattern<nvgpu::MBarrierGetOp> {
824+
using MBarrierBasePattern<nvgpu::MBarrierGetOp>::MBarrierBasePattern;
825+
826+
LogicalResult
827+
matchAndRewrite(nvgpu::MBarrierGetOp op, OpAdaptor adaptor,
828+
ConversionPatternRewriter &rewriter) const override {
829+
ImplicitLocOpBuilder b(op->getLoc(), rewriter);
830+
nvgpu::MBarrierGroupType mbarrierType = op.getBarriers().getType();
831+
rewriter.setInsertionPoint(op);
832+
Value barrier = getMbarrierPtr(b, mbarrierType, adaptor.getBarriers(),
833+
adaptor.getMbarId(), rewriter);
834+
Type resType = op.getMbarrierPointer().getType();
835+
rewriter.replaceOpWithNewOp<LLVM::PtrToIntOp>(op, resType, barrier);
836+
return success();
837+
}
838+
};
839+
822840
/// Lowers `nvgpu.mbarrier.init` to `nvvm.mbarrier.init`
823841
struct NVGPUMBarrierInitLowering
824842
: public MBarrierBasePattern<nvgpu::MBarrierInitOp> {
@@ -1706,6 +1724,7 @@ void mlir::populateNVGPUToNVVMConversionPatterns(
17061724
patterns.add<
17071725
NVGPUMBarrierCreateLowering, // nvgpu.mbarrier.create
17081726
NVGPUMBarrierInitLowering, // nvgpu.mbarrier.init
1727+
NVGPUMBarrierGetLowering, // nvgpu.mbarrier.get
17091728
NVGPUMBarrierArriveLowering, // nvgpu.mbarrier.arrive
17101729
NVGPUMBarrierArriveNoCompleteLowering, // nvgpu.mbarrier.arrive.no_complete
17111730
NVGPUMBarrierTestWaitLowering, // nvgpu.mbarrier.test_wait_parity

mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,28 @@ func.func @mbarrier_nocomplete() {
532532
func.return
533533
}
534534

535+
// CHECK-LABEL: func @mbarrier_get
536+
// CHECK-SAME: %[[ARG0:.*]]: !nvgpu.mbarrier.group{{.*}}
537+
func.func @mbarrier_get(%barriers : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5>) {
538+
// CHECK: %[[S0:.+]] = builtin.unrealized_conversion_cast %[[ARG0]] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
539+
// CHECK: %[[c2:.+]] = arith.constant 2 : index
540+
// CHECK: %[[S1:.+]] = builtin.unrealized_conversion_cast %[[c2]] : index to i64
541+
// CHECK: %[[S2:.+]] = llvm.extractvalue %[[S0]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
542+
// CHECK: %[[S3:.+]] = llvm.getelementptr %[[S2]][%[[S1]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
543+
// CHECK: %[[S4:.+]] = llvm.ptrtoint %[[S3]] : !llvm.ptr<3> to i32
544+
%c2 = arith.constant 2 : index
545+
nvgpu.mbarrier.get %barriers[%c2] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5> -> i32
546+
547+
// CHECK: %[[c4:.+]] = arith.constant 4 : index
548+
// CHECK: %[[S5:.+]] = builtin.unrealized_conversion_cast %[[c4]] : index to i64
549+
// CHECK: %[[S6:.+]] = llvm.extractvalue %[[S0]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
550+
// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S6]][%[[S5]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
551+
// CHECK: %[[S8:.+]] = llvm.ptrtoint %[[S7]] : !llvm.ptr<3> to i64
552+
%c4 = arith.constant 4 : index
553+
nvgpu.mbarrier.get %barriers[%c4] : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5> -> i64
554+
func.return
555+
}
556+
535557
// CHECK-LABEL: func @mbarrier_wait(
536558
// CHECK-SAME: %[[ARG0:.*]]: !nvgpu.mbarrier.group{{.*}}, %[[ARG1:.*]]: !nvgpu.mbarrier.token)
537559
func.func @mbarrier_wait(%barriers : !nvgpu.mbarrier.group<memorySpace = #gpu.address_space<workgroup>, num_barriers = 5>, %token : !tokenType) {

0 commit comments

Comments
 (0)