@@ -472,17 +472,17 @@ static constexpr IntrinsicHandler cudaHandlers[]{
472472 /* isElemental=*/ false },
473473 {" threadfence" ,
474474 static_cast <CUDAIntrinsicLibrary::SubroutineGenerator>(
475- &CI::genThreadFence),
475+ &CI::genThreadFence<mlir::NVVM::MemScopeKind::GPU> ),
476476 {},
477477 /* isElemental=*/ false },
478478 {" threadfence_block" ,
479479 static_cast <CUDAIntrinsicLibrary::SubroutineGenerator>(
480- &CI::genThreadFenceBlock ),
480+ &CI::genThreadFence<mlir::NVVM::MemScopeKind::CTA> ),
481481 {},
482482 /* isElemental=*/ false },
483483 {" threadfence_system" ,
484484 static_cast <CUDAIntrinsicLibrary::SubroutineGenerator>(
485- &CI::genThreadFenceSystem ),
485+ &CI::genThreadFence<mlir::NVVM::MemScopeKind::SYS> ),
486486 {},
487487 /* isElemental=*/ false },
488488 {" tma_bulk_commit_group" ,
@@ -1306,25 +1306,12 @@ CUDAIntrinsicLibrary::genThisWarp(mlir::Type resultType,
13061306 return res;
13071307}
13081308
1309- // THREADFENCE
1309+ // THREADFENCE, THREADFENCE_BLOCK, THREADFENCE_SYSTEM
1310+ template <mlir::NVVM::MemScopeKind scope>
13101311void CUDAIntrinsicLibrary::genThreadFence (
13111312 llvm::ArrayRef<fir::ExtendedValue> args) {
13121313 assert (args.size () == 0 );
1313- mlir::NVVM::MembarOp::create (builder, loc, mlir::NVVM::MemScopeKind::GPU);
1314- }
1315-
1316- // THREADFENCE_BLOCK
1317- void CUDAIntrinsicLibrary::genThreadFenceBlock (
1318- llvm::ArrayRef<fir::ExtendedValue> args) {
1319- assert (args.size () == 0 );
1320- mlir::NVVM::MembarOp::create (builder, loc, mlir::NVVM::MemScopeKind::CTA);
1321- }
1322-
1323- // THREADFENCE_SYSTEM
1324- void CUDAIntrinsicLibrary::genThreadFenceSystem (
1325- llvm::ArrayRef<fir::ExtendedValue> args) {
1326- assert (args.size () == 0 );
1327- mlir::NVVM::MembarOp::create (builder, loc, mlir::NVVM::MemScopeKind::SYS);
1314+ mlir::NVVM::MembarOp::create (builder, loc, scope);
13281315}
13291316
13301317// TMA_BULK_COMMIT_GROUP
0 commit comments