@@ -301,7 +301,23 @@ __global__ void IsfiniteCUDAKernel(
301301 const T* in_data,
302302 IndexType num,
303303 bool * out_data,
304- typename std::enable_if<std::is_floating_point<T>::value>::type* = 0 ) {
304+ typename std::enable_if<std::is_floating_point<T>::value &&
305+ !std::is_same<T, phi::bfloat16>::value &&
306+ !std::is_same<T, phi::float16>::value>::type* = 0 ) {
307+ IndexType idx = threadIdx.x + blockIdx.x * blockDim.x ;
308+ for (IndexType i = idx; i < num; i += blockDim.x * gridDim.x ) {
309+ const T& a = in_data[i];
310+ out_data[i] = isfinite (a);
311+ }
312+ }
313+
314+ template <typename T, typename IndexType>
315+ __global__ void IsfiniteCUDAKernel (
316+ const T* in_data,
317+ IndexType num,
318+ bool * out_data,
319+ typename std::enable_if<std::is_same<T, phi::bfloat16>::value ||
320+ std::is_same<T, phi::float16>::value>::type* = 0 ) {
305321 IndexType idx = threadIdx.x + blockIdx.x * blockDim.x ;
306322 for (IndexType i = idx; i < num; i += blockDim.x * gridDim.x ) {
307323 const T& a = in_data[i];
@@ -340,7 +356,23 @@ __global__ void IsnanCUDAKernel(
340356 const T* in_data,
341357 IndexType num,
342358 bool * out_data,
343- typename std::enable_if<std::is_floating_point<T>::value>::type* = 0 ) {
359+ typename std::enable_if<std::is_floating_point<T>::value &&
360+ !std::is_same<T, phi::bfloat16>::value &&
361+ !std::is_same<T, phi::float16>::value>::type* = 0 ) {
362+ IndexType idx = threadIdx.x + blockIdx.x * blockDim.x ;
363+ for (IndexType i = idx; i < num; i += blockDim.x * gridDim.x ) {
364+ const T& a = in_data[i];
365+ out_data[i] = isnan (a);
366+ }
367+ }
368+
369+ template <typename T, typename IndexType>
370+ __global__ void IsnanCUDAKernel (
371+ const T* in_data,
372+ IndexType num,
373+ bool * out_data,
374+ typename std::enable_if<std::is_same<T, phi::bfloat16>::value ||
375+ std::is_same<T, phi::float16>::value>::type* = 0 ) {
344376 IndexType idx = threadIdx.x + blockIdx.x * blockDim.x ;
345377 for (IndexType i = idx; i < num; i += blockDim.x * gridDim.x ) {
346378 const T& a = in_data[i];
@@ -379,7 +411,23 @@ __global__ void IsinfCUDAKernel(
379411 const T* in_data,
380412 IndexType num,
381413 bool * out_data,
382- typename std::enable_if<std::is_floating_point<T>::value>::type* = 0 ) {
414+ typename std::enable_if<std::is_floating_point<T>::value &&
415+ !std::is_same<T, phi::bfloat16>::value &&
416+ !std::is_same<T, phi::float16>::value>::type* = 0 ) {
417+ IndexType idx = threadIdx.x + blockIdx.x * blockDim.x ;
418+ for (IndexType i = idx; i < num; i += blockDim.x * gridDim.x ) {
419+ const T& a = in_data[i];
420+ out_data[i] = isinf (a);
421+ }
422+ }
423+
424+ template <typename T, typename IndexType>
425+ __global__ void IsinfCUDAKernel (
426+ const T* in_data,
427+ IndexType num,
428+ bool * out_data,
429+ typename std::enable_if<std::is_same<T, phi::bfloat16>::value ||
430+ std::is_same<T, phi::float16>::value>::type* = 0 ) {
383431 IndexType idx = threadIdx.x + blockIdx.x * blockDim.x ;
384432 for (IndexType i = idx; i < num; i += blockDim.x * gridDim.x ) {
385433 const T& a = in_data[i];
@@ -477,9 +525,9 @@ struct IsinfFunctor<phi::GPUContext, T> {
477525#endif
478526
479527template <typename T, typename Context>
480- PADDLE_API void IsfiniteKernel (const Context& dev_ctx,
481- const DenseTensor& x,
482- DenseTensor* out) {
528+ void IsfiniteKernel (const Context& dev_ctx,
529+ const DenseTensor& x,
530+ DenseTensor* out) {
483531 if (out && out->numel () == 0 ) {
484532 dev_ctx.template Alloc <bool >(out);
485533 return ;
0 commit comments