@@ -1020,6 +1020,13 @@ static constexpr IntrinsicHandler handlers[]{
10201020 &I::genTMABulkCommitGroup,
10211021 {{}},
10221022 /* isElemental=*/ false },
1023+ {" tma_bulk_g2s" ,
1024+ &I::genTMABulkG2S,
1025+ {{{" barrier" , asAddr},
1026+ {" src" , asAddr},
1027+ {" dst" , asAddr},
1028+ {" nbytes" , asValue}}},
1029+ /* isElemental=*/ false },
10231030 {" tma_bulk_wait_group" ,
10241031 &I::genTMABulkWaitGroup,
10251032 {{}},
@@ -3200,17 +3207,17 @@ IntrinsicLibrary::genAssociated(mlir::Type resultType,
32003207 return fir::runtime::genAssociated (builder, loc, pointerBox, targetBox);
32013208}
32023209
3203- static mlir::Value convertBarrierToLLVM (fir::FirOpBuilder &builder,
3204- mlir::Location loc,
3205- mlir::Value barrier) {
3210+ static mlir::Value convertPtrToNVVMSpace (fir::FirOpBuilder &builder,
3211+ mlir::Location loc,
3212+ mlir::Value barrier,
3213+ mlir::NVVM::NVVMMemorySpace space) {
32063214 mlir::Value llvmPtr = fir::ConvertOp::create (
32073215 builder, loc, mlir::LLVM::LLVMPointerType::get (builder.getContext ()),
32083216 barrier);
32093217 mlir::Value addrCast = mlir::LLVM::AddrSpaceCastOp::create (
32103218 builder, loc,
3211- mlir::LLVM::LLVMPointerType::get (
3212- builder.getContext (),
3213- static_cast <unsigned >(mlir::NVVM::NVVMMemorySpace::Shared)),
3219+ mlir::LLVM::LLVMPointerType::get (builder.getContext (),
3220+ static_cast <unsigned >(space)),
32143221 llvmPtr);
32153222 return addrCast;
32163223}
@@ -3220,7 +3227,8 @@ mlir::Value
32203227IntrinsicLibrary::genBarrierArrive (mlir::Type resultType,
32213228 llvm::ArrayRef<mlir::Value> args) {
32223229 assert (args.size () == 1 );
3223- mlir::Value barrier = convertBarrierToLLVM (builder, loc, args[0 ]);
3230+ mlir::Value barrier = convertPtrToNVVMSpace (
3231+ builder, loc, args[0 ], mlir::NVVM::NVVMMemorySpace::Shared);
32243232 return mlir::NVVM::MBarrierArriveSharedOp::create (builder, loc, resultType,
32253233 barrier)
32263234 .getResult ();
@@ -3231,7 +3239,8 @@ mlir::Value
32313239IntrinsicLibrary::genBarrierArriveCnt (mlir::Type resultType,
32323240 llvm::ArrayRef<mlir::Value> args) {
32333241 assert (args.size () == 2 );
3234- mlir::Value barrier = convertBarrierToLLVM (builder, loc, args[0 ]);
3242+ mlir::Value barrier = convertPtrToNVVMSpace (
3243+ builder, loc, args[0 ], mlir::NVVM::NVVMMemorySpace::Shared);
32353244 mlir::Value token = fir::AllocaOp::create (builder, loc, resultType);
32363245 // TODO: the MBarrierArriveExpectTxOp is not taking the state argument and
32373246 // currently just the sink symbol `_`.
@@ -3244,8 +3253,8 @@ IntrinsicLibrary::genBarrierArriveCnt(mlir::Type resultType,
32443253// BARRIER_INIT (CUDA)
32453254void IntrinsicLibrary::genBarrierInit (llvm::ArrayRef<fir::ExtendedValue> args) {
32463255 assert (args.size () == 2 );
3247- mlir::Value barrier =
3248- convertBarrierToLLVM ( builder, loc, fir::getBase (args[0 ]));
3256+ mlir::Value barrier = convertPtrToNVVMSpace (
3257+ builder, loc, fir::getBase (args[0 ]), mlir::NVVM::NVVMMemorySpace::Shared );
32493258 mlir::NVVM::MBarrierInitSharedOp::create (builder, loc, barrier,
32503259 fir::getBase (args[1 ]), {});
32513260 auto kind = mlir::NVVM::ProxyKindAttr::get (
@@ -9204,6 +9213,20 @@ void IntrinsicLibrary::genTMABulkCommitGroup(
92049213 mlir::NVVM::CpAsyncBulkCommitGroupOp::create (builder, loc);
92059214}
92069215
9216+ // TMA_BULK_G2S (CUDA)
9217+ void IntrinsicLibrary::genTMABulkG2S (llvm::ArrayRef<fir::ExtendedValue> args) {
9218+ assert (args.size () == 4 );
9219+ mlir::Value barrier = convertPtrToNVVMSpace (
9220+ builder, loc, fir::getBase (args[0 ]), mlir::NVVM::NVVMMemorySpace::Shared);
9221+ mlir::Value dst =
9222+ convertPtrToNVVMSpace (builder, loc, fir::getBase (args[2 ]),
9223+ mlir::NVVM::NVVMMemorySpace::SharedCluster);
9224+ mlir::Value src = convertPtrToNVVMSpace (builder, loc, fir::getBase (args[1 ]),
9225+ mlir::NVVM::NVVMMemorySpace::Global);
9226+ mlir::NVVM::CpAsyncBulkGlobalToSharedClusterOp::create (
9227+ builder, loc, dst, src, barrier, fir::getBase (args[3 ]), {}, {});
9228+ }
9229+
92079230// TMA_BULK_WAIT_GROUP (CUDA)
92089231void IntrinsicLibrary::genTMABulkWaitGroup (
92099232 llvm::ArrayRef<fir::ExtendedValue> args) {
0 commit comments