Skip to content

Commit c5c7c99

Browse files
committed
[flang][cuda] Add fence after barrier_init
1 parent ca55c07 commit c5c7c99

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3236,6 +3236,11 @@ void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) {
32363236
convertBarrierToLLVM(builder, loc, fir::getBase(args[0]));
32373237
mlir::NVVM::MBarrierInitSharedOp::create(builder, loc, barrier,
32383238
fir::getBase(args[1]), {});
3239+
auto kind = mlir::NVVM::ProxyKindAttr::get(
3240+
builder.getContext(), mlir::NVVM::ProxyKind::async_shared);
3241+
auto space = mlir::NVVM::SharedSpaceAttr::get(
3242+
builder.getContext(), mlir::NVVM::SharedSpace::shared_cta);
3243+
mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space);
32393244
}
32403245

32413246
// BESSEL_JN

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,7 @@ end subroutine
411411
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
412412
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
413413
! CHECK: nvvm.mbarrier.init.shared %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32
414+
! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>}
414415

415416
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
416417
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>

0 commit comments

Comments
 (0)