Skip to content

Commit e0dab82

Browse files
authored
[flang][cuda] Add instructions for tma_bulk_s2g (llvm#165480)
1 parent 6d51c31 commit e0dab82

File tree

2 files changed

+7
-0
lines changed

2 files changed

+7
-0
lines changed

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9287,6 +9287,11 @@ void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) {
92879287
mlir::NVVM::NVVMMemorySpace::Global);
92889288
mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create(
92899289
builder, loc, dst, src, fir::getBase(args[2]), {}, {});
9290+
9291+
mlir::NVVM::InlinePtxOp::create(builder, loc, mlir::TypeRange{}, {}, {},
9292+
"cp.async.bulk.commit_group", {});
9293+
mlir::NVVM::CpAsyncBulkWaitGroupOp::create(builder, loc,
9294+
builder.getI32IntegerAttr(0), {});
92909295
}
92919296

92929297
// TMA_BULK_WAIT_GROUP (CUDA)

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,8 @@ end subroutine
479479

480480
! CHECK-LABEL: func.func @_QPtest_bulk_s2g
481481
! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
482+
! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group"
483+
! CHECK: nvvm.cp.async.bulk.wait_group 0
482484

483485
attributes(device) subroutine testAtomicCasLoop(aa, n)
484486
integer :: a

0 commit comments

Comments
 (0)