@@ -346,6 +346,14 @@ static constexpr IntrinsicHandler handlers[]{
346346 &I::genVoteSync<mlir::NVVM::VoteSyncKind::ballot>,
347347 {{{" mask" , asValue}, {" pred" , asValue}}},
348348 /* isElemental=*/ false },
349+ {" barrier_arrive" ,
350+ &I::genBarrierArrive,
351+ {{{" barrier" , asAddr}}},
352+ /* isElemental=*/ false },
353+ {" barrier_arrive_cnt" ,
354+ &I::genBarrierArriveCnt,
355+ {{{" barrier" , asAddr}, {" count" , asValue}}},
356+ /* isElemental=*/ false },
349357 {" barrier_init" ,
350358 &I::genBarrierInit,
351359 {{{" barrier" , asAddr}, {" count" , asValue}}},
@@ -3180,19 +3188,53 @@ IntrinsicLibrary::genAssociated(mlir::Type resultType,
31803188 return fir::runtime::genAssociated (builder, loc, pointerBox, targetBox);
31813189}
31823190
3183- // BARRIER_INIT (CUDA)
3184- void IntrinsicLibrary::genBarrierInit (llvm::ArrayRef<fir::ExtendedValue> args) {
3185- assert (args. size () == 2 );
3186- auto llvmPtr = fir::ConvertOp::create (
3191+ static mlir::Value convertBarrierToLLVM (fir::FirOpBuilder &builder,
3192+ mlir::Location loc,
3193+ mlir::Value barrier) {
3194+ mlir::Value llvmPtr = fir::ConvertOp::create (
31873195 builder, loc, mlir::LLVM::LLVMPointerType::get (builder.getContext ()),
3188- fir::getBase (args[ 0 ]) );
3189- auto addrCast = mlir::LLVM::AddrSpaceCastOp::create (
3196+ barrier );
3197+ mlir::Value addrCast = mlir::LLVM::AddrSpaceCastOp::create (
31903198 builder, loc,
31913199 mlir::LLVM::LLVMPointerType::get (
31923200 builder.getContext (),
31933201 static_cast <unsigned >(mlir::NVVM::NVVMMemorySpace::Shared)),
31943202 llvmPtr);
3195- mlir::NVVM::MBarrierInitSharedOp::create (builder, loc, addrCast,
3203+ return addrCast;
3204+ }
3205+
3206+ // BARRIER_ARRIVE (CUDA)
3207+ mlir::Value
3208+ IntrinsicLibrary::genBarrierArrive (mlir::Type resultType,
3209+ llvm::ArrayRef<mlir::Value> args) {
3210+ assert (args.size () == 1 );
3211+ mlir::Value barrier = convertBarrierToLLVM (builder, loc, args[0 ]);
3212+ return mlir::NVVM::MBarrierArriveSharedOp::create (builder, loc, resultType,
3213+ barrier)
3214+ .getResult ();
3215+ }
3216+
3217+ // BARRIER_ARRIBVE_CNT (CUDA)
3218+ mlir::Value
3219+ IntrinsicLibrary::genBarrierArriveCnt (mlir::Type resultType,
3220+ llvm::ArrayRef<mlir::Value> args) {
3221+ assert (args.size () == 2 );
3222+ mlir::Value barrier = convertBarrierToLLVM (builder, loc, args[0 ]);
3223+ mlir::Value token = fir::AllocaOp::create (builder, loc, resultType);
3224+ // TODO: the MBarrierArriveExpectTxOp is not taking the state argument and
3225+ // currently just the sink symbol `_`.
3226+ // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive
3227+ mlir::NVVM::MBarrierArriveExpectTxOp::create (builder, loc, barrier, args[1 ],
3228+ {});
3229+ return fir::LoadOp::create (builder, loc, token);
3230+ }
3231+
3232+ // BARRIER_INIT (CUDA)
3233+ void IntrinsicLibrary::genBarrierInit (llvm::ArrayRef<fir::ExtendedValue> args) {
3234+ assert (args.size () == 2 );
3235+ mlir::Value barrier =
3236+ convertBarrierToLLVM (builder, loc, fir::getBase (args[0 ]));
3237+ mlir::NVVM::MBarrierInitSharedOp::create (builder, loc, barrier,
31963238 fir::getBase (args[1 ]), {});
31973239}
31983240
0 commit comments