diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index d0c312c09353f..dc72fc52c7345 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -754,11 +754,11 @@ attributes(device) real(8) function sinpi(x) bind(c,name='__nv_sinpi') end interface interface __popc - attributes(device) integer function __popc(i) bind(c) + attributes(device) integer function __popc(i) bind(c, name='__nv_popc') !dir$ ignore_tkr (d) i integer, value :: i end function - attributes(device) integer function __popcll(i) bind(c) + attributes(device) integer function __popcll(i) bind(c, name='__nv_popcll') !dir$ ignore_tkr (d) i integer(8), value :: i end function diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index d5e614a83b354..a8cc5034a9348 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -11,6 +11,7 @@ attributes(global) subroutine devsub() integer(8) :: al integer(8) :: time integer :: smalltime + integer(4) :: res call syncthreads() call syncwarp(1) @@ -49,6 +50,9 @@ attributes(global) subroutine devsub() smalltime = clock() time = clock64() time = globalTimer() + + res = __popc(ai) + res = __popc(al) end ! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc} @@ -89,6 +93,9 @@ end ! CHECK: %{{.*}} = nvvm.read.ptx.sreg.clock64 : i64 ! CHECK: %{{.*}} = nvvm.read.ptx.sreg.globaltimer : i64 +! CHECK: %{{.*}} = fir.call @__nv_popc(%{{.*}}) proc_attrs fastmath : (i32) -> i32 +! CHECK: %{{.*}} = fir.call @__nv_popcll(%{{.*}}) proc_attrs fastmath : (i64) -> i32 + subroutine host1() integer, device :: a(32) integer, device :: ret