Skip to content

Commit 548e013

Browse files
authored
[flang][cuda] Add fence after barrier_init (#163016)
Add a fence after the barrier init instruction as it is done in the reference compiler.
1 parent 04dbb44 commit 548e013

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3244,6 +3244,11 @@ void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) {
32443244
convertBarrierToLLVM(builder, loc, fir::getBase(args[0]));
32453245
mlir::NVVM::MBarrierInitSharedOp::create(builder, loc, barrier,
32463246
fir::getBase(args[1]), {});
3247+
auto kind = mlir::NVVM::ProxyKindAttr::get(
3248+
builder.getContext(), mlir::NVVM::ProxyKind::async_shared);
3249+
auto space = mlir::NVVM::SharedSpaceAttr::get(
3250+
builder.getContext(), mlir::NVVM::SharedSpace::shared_cta);
3251+
mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space);
32473252
}
32483253

32493254
// BESSEL_JN

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,7 @@ end subroutine
411411
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
412412
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
413413
! CHECK: nvvm.mbarrier.init.shared %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32
414+
! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>}
414415

415416
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
416417
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>

0 commit comments

Comments
 (0)