[libclc] Optimize generic CLC fmin/fmax

frasercrmck · frasercrmck · commit 414c6cf56024 · 2025-07-28T17:39:42.000+01:00
The CLC fmin/fmax builtins now use clang's
__builtin_elementwise_(min|max) which helps us generate
llvm.(min|max)num intrinsics directly. These intrinsics select the
non-NAN input over the NAN input, which adheres to the OpenCL
specification. Note that the OpenCL specification doesn't require
support for sNAN, so returning qNAN over sNAN is acceptable. Note also
that the intrinsics don't differentiate between -0.0 and +0.0; this does
not appear to be required - going by the OpenCL CTS, at least.

These intrinsics maintain the vector types, as opposed to scalarizing,
which was previously happening. This commit therefore helps to optimize
codegen for those targets.
diff --git a/libclc/clc/lib/generic/math/clc_fmax.cl b/libclc/clc/lib/generic/math/clc_fmax.cl
@@ -6,53 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
 
-#define __FLOAT_ONLY
-#define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmax
-#define __IMPL_FUNCTION __builtin_fmaxf
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-#define __DOUBLE_ONLY
-#define __CLC_MIN_VECSIZE 1
-#define FUNCTION __clc_fmax
-#define __IMPL_FUNCTION __builtin_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
+#define __IMPL_FUNCTION(x) __builtin_elementwise_maximumnum
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (x < y) ? y : x;
-}
-
-#define __HALF_ONLY
-#define __CLC_SUPPORTED_VECSIZE_OR_1 2
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#endif
diff --git a/libclc/clc/lib/generic/math/clc_fmin.cl b/libclc/clc/lib/generic/math/clc_fmin.cl
@@ -6,52 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
 
-#define __FLOAT_ONLY
-#define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmin
-#define __IMPL_FUNCTION __builtin_fminf
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-#define __DOUBLE_ONLY
-#define __CLC_MIN_VECSIZE 1
-#define FUNCTION __clc_fmin
-#define __IMPL_FUNCTION __builtin_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
+#define __IMPL_FUNCTION(x) __builtin_elementwise_minimumnum
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? y : x;
-}
-
-#define __HALF_ONLY
-#define __CLC_SUPPORTED_VECSIZE_OR_1 2
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
-
-#endif