@@ -1080,42 +1080,39 @@ void CUDAIntrinsicLibrary::genSyncThreads(
10801080mlir::Value
10811081CUDAIntrinsicLibrary::genSyncThreadsAnd (mlir::Type resultType,
10821082 llvm::ArrayRef<mlir::Value> args) {
1083- constexpr llvm::StringLiteral funcName = " llvm.nvvm.barrier0.and" ;
1084- mlir::MLIRContext *context = builder.getContext ();
1085- mlir::Type i32 = builder.getI32Type ();
1086- mlir::FunctionType ftype =
1087- mlir::FunctionType::get (context, {resultType}, {i32 });
1088- auto funcOp = builder.createFunction (loc, funcName, ftype);
1089- mlir::Value arg = builder.createConvert (loc, i32 , args[0 ]);
1090- return fir::CallOp::create (builder, loc, funcOp, {arg}).getResult (0 );
1083+ mlir::Value arg = builder.createConvert (loc, builder.getI32Type (), args[0 ]);
1084+ return mlir::NVVM::BarrierOp::create (
1085+ builder, loc, resultType, {}, {},
1086+ mlir::NVVM::BarrierReductionAttr::get (
1087+ builder.getContext (), mlir::NVVM::BarrierReduction::AND),
1088+ arg)
1089+ .getResult (0 );
10911090}
10921091
10931092// SYNCTHREADS_COUNT
10941093mlir::Value
10951094CUDAIntrinsicLibrary::genSyncThreadsCount (mlir::Type resultType,
10961095 llvm::ArrayRef<mlir::Value> args) {
1097- constexpr llvm::StringLiteral funcName = " llvm.nvvm.barrier0.popc" ;
1098- mlir::MLIRContext *context = builder.getContext ();
1099- mlir::Type i32 = builder.getI32Type ();
1100- mlir::FunctionType ftype =
1101- mlir::FunctionType::get (context, {resultType}, {i32 });
1102- auto funcOp = builder.createFunction (loc, funcName, ftype);
1103- mlir::Value arg = builder.createConvert (loc, i32 , args[0 ]);
1104- return fir::CallOp::create (builder, loc, funcOp, {arg}).getResult (0 );
1096+ mlir::Value arg = builder.createConvert (loc, builder.getI32Type (), args[0 ]);
1097+ return mlir::NVVM::BarrierOp::create (
1098+ builder, loc, resultType, {}, {},
1099+ mlir::NVVM::BarrierReductionAttr::get (
1100+ builder.getContext (), mlir::NVVM::BarrierReduction::POPC),
1101+ arg)
1102+ .getResult (0 );
11051103}
11061104
11071105// SYNCTHREADS_OR
11081106mlir::Value
11091107CUDAIntrinsicLibrary::genSyncThreadsOr (mlir::Type resultType,
11101108 llvm::ArrayRef<mlir::Value> args) {
1111- constexpr llvm::StringLiteral funcName = " llvm.nvvm.barrier0.or" ;
1112- mlir::MLIRContext *context = builder.getContext ();
1113- mlir::Type i32 = builder.getI32Type ();
1114- mlir::FunctionType ftype =
1115- mlir::FunctionType::get (context, {resultType}, {i32 });
1116- auto funcOp = builder.createFunction (loc, funcName, ftype);
1117- mlir::Value arg = builder.createConvert (loc, i32 , args[0 ]);
1118- return fir::CallOp::create (builder, loc, funcOp, {arg}).getResult (0 );
1109+ mlir::Value arg = builder.createConvert (loc, builder.getI32Type (), args[0 ]);
1110+ return mlir::NVVM::BarrierOp::create (
1111+ builder, loc, resultType, {}, {},
1112+ mlir::NVVM::BarrierReductionAttr::get (
1113+ builder.getContext (), mlir::NVVM::BarrierReduction::OR),
1114+ arg)
1115+ .getResult (0 );
11191116}
11201117
11211118// SYNCWARP
0 commit comments