@@ -1309,34 +1309,22 @@ CUDAIntrinsicLibrary::genThisWarp(mlir::Type resultType,
13091309// THREADFENCE
13101310void CUDAIntrinsicLibrary::genThreadFence (
13111311 llvm::ArrayRef<fir::ExtendedValue> args) {
1312- constexpr llvm::StringLiteral funcName = " llvm.nvvm.membar.gl" ;
1313- mlir::FunctionType funcType =
1314- mlir::FunctionType::get (builder.getContext (), {}, {});
1315- auto funcOp = builder.createFunction (loc, funcName, funcType);
1316- llvm::SmallVector<mlir::Value> noArgs;
1317- fir::CallOp::create (builder, loc, funcOp, noArgs);
1312+ assert (args.size () == 0 );
1313+ mlir::NVVM::MembarOp::create (builder, loc, mlir::NVVM::MemScopeKind::GPU);
13181314}
13191315
13201316// THREADFENCE_BLOCK
13211317void CUDAIntrinsicLibrary::genThreadFenceBlock (
13221318 llvm::ArrayRef<fir::ExtendedValue> args) {
1323- constexpr llvm::StringLiteral funcName = " llvm.nvvm.membar.cta" ;
1324- mlir::FunctionType funcType =
1325- mlir::FunctionType::get (builder.getContext (), {}, {});
1326- auto funcOp = builder.createFunction (loc, funcName, funcType);
1327- llvm::SmallVector<mlir::Value> noArgs;
1328- fir::CallOp::create (builder, loc, funcOp, noArgs);
1319+ assert (args.size () == 0 );
1320+ mlir::NVVM::MembarOp::create (builder, loc, mlir::NVVM::MemScopeKind::CTA);
13291321}
13301322
13311323// THREADFENCE_SYSTEM
13321324void CUDAIntrinsicLibrary::genThreadFenceSystem (
13331325 llvm::ArrayRef<fir::ExtendedValue> args) {
1334- constexpr llvm::StringLiteral funcName = " llvm.nvvm.membar.sys" ;
1335- mlir::FunctionType funcType =
1336- mlir::FunctionType::get (builder.getContext (), {}, {});
1337- auto funcOp = builder.createFunction (loc, funcName, funcType);
1338- llvm::SmallVector<mlir::Value> noArgs;
1339- fir::CallOp::create (builder, loc, funcOp, noArgs);
1326+ assert (args.size () == 0 );
1327+ mlir::NVVM::MembarOp::create (builder, loc, mlir::NVVM::MemScopeKind::SYS);
13401328}
13411329
13421330// TMA_BULK_COMMIT_GROUP
0 commit comments