Skip to content

Commit 4cf7670

Browse files
authored
[flang][cuda] Lower clock() to NNVM op (#149228)
Also use a same gen function for all NVVM time ops.
1 parent b52cf75 commit 4cf7670

File tree

4 files changed

+28
-14
lines changed

4 files changed

+28
-14
lines changed

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,6 @@ struct IntrinsicLibrary {
282282
llvm::ArrayRef<mlir::Value> args);
283283
mlir::Value genGetUID(mlir::Type resultType,
284284
llvm::ArrayRef<mlir::Value> args);
285-
mlir::Value genGlobalTimer(mlir::Type, llvm::ArrayRef<mlir::Value>);
286285
fir::ExtendedValue genHostnm(std::optional<mlir::Type> resultType,
287286
llvm::ArrayRef<fir::ExtendedValue> args);
288287
fir::ExtendedValue genIall(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
@@ -377,6 +376,8 @@ struct IntrinsicLibrary {
377376
fir::ExtendedValue genNorm2(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
378377
mlir::Value genNot(mlir::Type, llvm::ArrayRef<mlir::Value>);
379378
fir::ExtendedValue genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
379+
template <typename OpTy>
380+
mlir::Value genNVVMTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
380381
fir::ExtendedValue genPack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
381382
fir::ExtendedValue genParity(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
382383
void genPerror(llvm::ArrayRef<fir::ExtendedValue>);

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ static constexpr IntrinsicHandler handlers[]{
385385
&I::genChdir,
386386
{{{"name", asAddr}, {"status", asAddr, handleDynamicOptional}}},
387387
/*isElemental=*/false},
388+
{"clock", &I::genNVVMTime<mlir::NVVM::ClockOp>, {}, /*isElemental=*/false},
388389
{"clock64", &I::genClock64, {}, /*isElemental=*/false},
389390
{"cmplx",
390391
&I::genCmplx,
@@ -503,7 +504,10 @@ static constexpr IntrinsicHandler handlers[]{
503504
{"getgid", &I::genGetGID},
504505
{"getpid", &I::genGetPID},
505506
{"getuid", &I::genGetUID},
506-
{"globaltimer", &I::genGlobalTimer, {}, /*isElemental=*/false},
507+
{"globaltimer",
508+
&I::genNVVMTime<mlir::NVVM::GlobalTimerOp>,
509+
{},
510+
/*isElemental=*/false},
507511
{"hostnm",
508512
&I::genHostnm,
509513
{{{"c", asBox}, {"status", asAddr, handleDynamicOptional}}},
@@ -4320,13 +4324,6 @@ mlir::Value IntrinsicLibrary::genGetUID(mlir::Type resultType,
43204324
fir::runtime::genGetUID(builder, loc));
43214325
}
43224326

4323-
// GLOBALTIMER
4324-
mlir::Value IntrinsicLibrary::genGlobalTimer(mlir::Type resultType,
4325-
llvm::ArrayRef<mlir::Value> args) {
4326-
assert(args.size() == 0 && "globalTimer takes no args");
4327-
return builder.create<mlir::NVVM::GlobalTimerOp>(loc, resultType).getResult();
4328-
}
4329-
43304327
// GET_COMMAND_ARGUMENT
43314328
void IntrinsicLibrary::genGetCommandArgument(
43324329
llvm::ArrayRef<fir::ExtendedValue> args) {
@@ -7207,6 +7204,14 @@ IntrinsicLibrary::genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue> args) {
72077204
return fir::MutableBoxValue(boxStorage, mold->nonDeferredLenParams(), {});
72087205
}
72097206

7207+
// CLOCK, GLOBALTIMER
7208+
template <typename OpTy>
7209+
mlir::Value IntrinsicLibrary::genNVVMTime(mlir::Type resultType,
7210+
llvm::ArrayRef<mlir::Value> args) {
7211+
assert(args.size() == 0 && "expect no arguments");
7212+
return builder.create<OpTy>(loc, resultType).getResult();
7213+
}
7214+
72107215
// PACK
72117216
fir::ExtendedValue
72127217
IntrinsicLibrary::genPack(mlir::Type resultType,

flang/module/cudadevice.f90

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -957,11 +957,21 @@ attributes(device) pure integer function atomicxori(address, val)
957957

958958
! Time function
959959

960+
interface
961+
attributes(device) integer function clock()
962+
end function
963+
end interface
964+
960965
interface
961966
attributes(device) integer(8) function clock64()
962967
end function
963968
end interface
964969

970+
interface
971+
attributes(device) integer(8) function globalTimer()
972+
end function
973+
end interface
974+
965975
! Warp Match Functions
966976

967977
interface match_all_sync
@@ -1613,11 +1623,6 @@ attributes(device,host) logical function on_device() bind(c)
16131623
end function
16141624
end interface
16151625

1616-
interface
1617-
attributes(device) integer(8) function globalTimer()
1618-
end function
1619-
end interface
1620-
16211626
contains
16221627

16231628
attributes(device) subroutine syncthreads()

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ attributes(global) subroutine devsub()
1010
integer(4) :: ai
1111
integer(8) :: al
1212
integer(8) :: time
13+
integer :: smalltime
1314

1415
call syncthreads()
1516
call syncwarp(1)
@@ -45,6 +46,7 @@ attributes(global) subroutine devsub()
4546
ai = atomicinc(ai, 1_4)
4647
ai = atomicdec(ai, 1_4)
4748

49+
smalltime = clock()
4850
time = clock64()
4951

5052
time = globalTimer()
@@ -84,6 +86,7 @@ end
8486
! CHECK: %{{.*}} = llvm.atomicrmw uinc_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
8587
! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
8688

89+
! CHECK: %{{.*}} = nvvm.read.ptx.sreg.clock : i32
8790
! CHECK: fir.call @llvm.nvvm.read.ptx.sreg.clock64()
8891
! CHECK: %{{.*}} = nvvm.read.ptx.sreg.globaltimer : i64
8992

0 commit comments

Comments
 (0)