Skip to content

Commit 34951f7

Browse files
authored
[flang][cuda] Use NVVM op for clock64 (#149223)
1 parent b9adc4a commit 34951f7

File tree

3 files changed

+6
-15
lines changed

3 files changed

+6
-15
lines changed

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,6 @@ struct IntrinsicLibrary {
241241
void genCFProcPointer(llvm::ArrayRef<fir::ExtendedValue>);
242242
fir::ExtendedValue genCFunLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
243243
fir::ExtendedValue genCLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
244-
mlir::Value genClock64(mlir::Type, llvm::ArrayRef<mlir::Value>);
245244
template <mlir::arith::CmpIPredicate pred>
246245
fir::ExtendedValue genCPtrCompare(mlir::Type,
247246
llvm::ArrayRef<fir::ExtendedValue>);

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,10 @@ static constexpr IntrinsicHandler handlers[]{
386386
{{{"name", asAddr}, {"status", asAddr, handleDynamicOptional}}},
387387
/*isElemental=*/false},
388388
{"clock", &I::genNVVMTime<mlir::NVVM::ClockOp>, {}, /*isElemental=*/false},
389-
{"clock64", &I::genClock64, {}, /*isElemental=*/false},
389+
{"clock64",
390+
&I::genNVVMTime<mlir::NVVM::Clock64Op>,
391+
{},
392+
/*isElemental=*/false},
390393
{"cmplx",
391394
&I::genCmplx,
392395
{{{"x", asValue}, {"y", asValue, handleDynamicOptional}}}},
@@ -3565,16 +3568,6 @@ IntrinsicLibrary::genChdir(std::optional<mlir::Type> resultType,
35653568
return {};
35663569
}
35673570

3568-
// CLOCK64
3569-
mlir::Value IntrinsicLibrary::genClock64(mlir::Type resultType,
3570-
llvm::ArrayRef<mlir::Value> args) {
3571-
constexpr llvm::StringLiteral funcName = "llvm.nvvm.read.ptx.sreg.clock64";
3572-
mlir::MLIRContext *context = builder.getContext();
3573-
mlir::FunctionType ftype = mlir::FunctionType::get(context, {}, {resultType});
3574-
auto funcOp = builder.createFunction(loc, funcName, ftype);
3575-
return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
3576-
}
3577-
35783571
// CMPLX
35793572
mlir::Value IntrinsicLibrary::genCmplx(mlir::Type resultType,
35803573
llvm::ArrayRef<mlir::Value> args) {
@@ -7204,7 +7197,7 @@ IntrinsicLibrary::genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue> args) {
72047197
return fir::MutableBoxValue(boxStorage, mold->nonDeferredLenParams(), {});
72057198
}
72067199

7207-
// CLOCK, GLOBALTIMER
7200+
// CLOCK, CLOCK64, GLOBALTIMER
72087201
template <typename OpTy>
72097202
mlir::Value IntrinsicLibrary::genNVVMTime(mlir::Type resultType,
72107203
llvm::ArrayRef<mlir::Value> args) {

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ attributes(global) subroutine devsub()
4848

4949
smalltime = clock()
5050
time = clock64()
51-
5251
time = globalTimer()
5352
end
5453

@@ -87,7 +86,7 @@ end
8786
! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
8887

8988
! CHECK: %{{.*}} = nvvm.read.ptx.sreg.clock : i32
90-
! CHECK: fir.call @llvm.nvvm.read.ptx.sreg.clock64()
89+
! CHECK: %{{.*}} = nvvm.read.ptx.sreg.clock64 : i64
9190
! CHECK: %{{.*}} = nvvm.read.ptx.sreg.globaltimer : i64
9291

9392
subroutine host1()

0 commit comments

Comments
 (0)