diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 320f913858956..695221cbcb42c 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -208,6 +208,7 @@ struct IntrinsicLibrary { fir::ExtendedValue genAssociated(mlir::Type, llvm::ArrayRef); mlir::Value genAtand(mlir::Type, llvm::ArrayRef); + void genBarrierInit(llvm::ArrayRef); fir::ExtendedValue genBesselJn(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genBesselYn(mlir::Type, diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index de7694ffd468c..2c21868295528 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -346,6 +346,10 @@ static constexpr IntrinsicHandler handlers[]{ &I::genVoteSync, {{{"mask", asValue}, {"pred", asValue}}}, /*isElemental=*/false}, + {"barrier_init", + &I::genBarrierInit, + {{{"barrier", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, {"bessel_jn", &I::genBesselJn, {{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}}, @@ -3176,6 +3180,22 @@ IntrinsicLibrary::genAssociated(mlir::Type resultType, return fir::runtime::genAssociated(builder, loc, pointerBox, targetBox); } +// BARRIER_INIT (CUDA) +void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef args) { + assert(args.size() == 2); + auto llvmPtr = fir::ConvertOp::create( + builder, loc, mlir::LLVM::LLVMPointerType::get(builder.getContext()), + fir::getBase(args[0])); + auto addrCast = mlir::LLVM::AddrSpaceCastOp::create( + builder, loc, + mlir::LLVM::LLVMPointerType::get( + builder.getContext(), + static_cast(mlir::NVVM::NVVMMemorySpace::Shared)), + llvmPtr); + mlir::NVVM::MBarrierInitSharedOp::create(builder, loc, addrCast, + fir::getBase(args[1]), {}); +} + // BESSEL_JN fir::ExtendedValue IntrinsicLibrary::genBesselJn(mlir::Type resultType, diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index 1598c64db2cb5..4f552dcf08372 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -1987,6 +1987,13 @@ attributes(device,host) logical function on_device() bind(c) end function end interface + interface + attributes(device) subroutine barrier_init(barrier, count) + integer(8) :: barrier + integer(4) :: count + end subroutine + end interface + contains attributes(device) subroutine syncthreads() diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 5e1f6b66d1d53..cdb337b115e47 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -392,28 +392,17 @@ end subroutine ! CHECK: %{{.*}} = nvvm.vote.sync any %{{.*}}, %{{.*}} -> i1 ! CHECK: %{{.*}} = nvvm.vote.sync ballot %{{.*}}, %{{.*}} -> i32 -! CHECK-DAG: func.func private @__ldca_i4x4_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcg_i4x4_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcs_i4x4_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldlu_i4x4_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcv_i4x4_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldca_i8x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcg_i8x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcs_i8x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldlu_i8x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcv_i8x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldca_r4x4_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcg_r4x4_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcs_r4x4_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldlu_r4x4_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcv_r4x4_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldca_r2x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcg_r2x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcs_r2x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldlu_r2x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcv_r2x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldca_r8x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcg_r8x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcs_r8x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldlu_r8x2_(!fir.ref>, !fir.ref>) -! CHECK-DAG: func.func private @__ldcv_r8x2_(!fir.ref>, !fir.ref>) +attributes(global) subroutine test_barrier() + integer(8), shared :: barrier + call barrier_init(barrier, 256) +end subroutine + + +! CHECK-LABEL: func.func @_QPtest_barrier() + +! CHECK: %[[SHARED:.*]] = cuf.shared_memory i64 {bindc_name = "barrier", uniq_name = "_QFtest_barrierEbarrier"} -> !fir.ref +! CHECK: %[[DECL_SHARED:.*]]:2 = hlfir.declare %[[SHARED]] {data_attr = #cuf.cuda, uniq_name = "_QFtest_barrierEbarrier"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[COUNT:.*]] = arith.constant 256 : i32 +! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref) -> !llvm.ptr +! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3> +! CHECK: nvvm.mbarrier.init.shared %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32