diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index d38c5b6d09a82..363b1d5844d1b 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -282,6 +282,7 @@ struct IntrinsicLibrary { llvm::ArrayRef args); mlir::Value genGetUID(mlir::Type resultType, llvm::ArrayRef args); + mlir::Value genGlobalTimer(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genHostnm(std::optional resultType, llvm::ArrayRef args); fir::ExtendedValue genIall(mlir::Type, llvm::ArrayRef); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 8d0a511744e25..ddfa27475fa7a 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -503,6 +503,7 @@ static constexpr IntrinsicHandler handlers[]{ {"getgid", &I::genGetGID}, {"getpid", &I::genGetPID}, {"getuid", &I::genGetUID}, + {"globaltimer", &I::genGlobalTimer, {}, /*isElemental=*/false}, {"hostnm", &I::genHostnm, {{{"c", asBox}, {"status", asAddr, handleDynamicOptional}}}, @@ -4319,6 +4320,13 @@ mlir::Value IntrinsicLibrary::genGetUID(mlir::Type resultType, fir::runtime::genGetUID(builder, loc)); } +// GLOBALTIMER +mlir::Value IntrinsicLibrary::genGlobalTimer(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 0 && "globalTimer takes no args"); + return builder.create(loc, resultType).getResult(); +} + // GET_COMMAND_ARGUMENT void IntrinsicLibrary::genGetCommandArgument( llvm::ArrayRef args) { diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index f8a30da8b9615..52a619e07165c 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -1613,6 +1613,11 @@ attributes(device,host) logical function on_device() bind(c) end function end interface + interface + attributes(device) integer(8) function globalTimer() + end function + end interface + contains attributes(device) subroutine syncthreads() diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 42ee7657966e2..888c7961ee2b4 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -46,6 +46,8 @@ attributes(global) subroutine devsub() ai = atomicdec(ai, 1_4) time = clock64() + + time = globalTimer() end ! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc} @@ -83,6 +85,7 @@ end ! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 ! CHECK: fir.call @llvm.nvvm.read.ptx.sreg.clock64() +! CHECK: %{{.*}} = nvvm.read.ptx.sreg.globaltimer : i64 subroutine host1() integer, device :: a(32)