Skip to content

Commit 414c6cf

Browse files
committed
[libclc] Optimize generic CLC fmin/fmax
The CLC fmin/fmax builtins now use clang's __builtin_elementwise_(min|max) which helps us generate llvm.(min|max)num intrinsics directly. These intrinsics select the non-NAN input over the NAN input, which adheres to the OpenCL specification. Note that the OpenCL specification doesn't require support for sNAN, so returning qNAN over sNAN is acceptable. Note also that the intrinsics don't differentiate between -0.0 and +0.0; this does not appear to be required - going by the OpenCL CTS, at least. These intrinsics maintain the vector types, as opposed to scalarizing, which was previously happening. This commit therefore helps to optimize codegen for those targets.
1 parent a22d010 commit 414c6cf

File tree

2 files changed

+4
-89
lines changed

2 files changed

+4
-89
lines changed

libclc/clc/lib/generic/math/clc_fmax.cl

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,53 +6,10 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include <clc/clcmacro.h>
109
#include <clc/internal/clc.h>
11-
#include <clc/relational/clc_isnan.h>
1210

13-
#define __FLOAT_ONLY
14-
#define __CLC_MIN_VECSIZE 1
1511
#define FUNCTION __clc_fmax
16-
#define __IMPL_FUNCTION __builtin_fmaxf
17-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
18-
#include <clc/math/gentype.inc>
19-
#undef __CLC_MIN_VECSIZE
20-
#undef FUNCTION
21-
#undef __IMPL_FUNCTION
22-
23-
#ifdef cl_khr_fp64
24-
25-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
26-
27-
#define __DOUBLE_ONLY
28-
#define __CLC_MIN_VECSIZE 1
29-
#define FUNCTION __clc_fmax
30-
#define __IMPL_FUNCTION __builtin_fmax
31-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
32-
#include <clc/math/gentype.inc>
33-
#undef __CLC_MIN_VECSIZE
34-
#undef FUNCTION
35-
#undef __IMPL_FUNCTION
12+
#define __IMPL_FUNCTION(x) __builtin_elementwise_maximumnum
13+
#define __CLC_BODY <clc/shared/binary_def.inc>
3614

37-
#endif
38-
39-
#ifdef cl_khr_fp16
40-
41-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
42-
43-
_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
44-
if (__clc_isnan(x))
45-
return y;
46-
if (__clc_isnan(y))
47-
return x;
48-
return (x < y) ? y : x;
49-
}
50-
51-
#define __HALF_ONLY
52-
#define __CLC_SUPPORTED_VECSIZE_OR_1 2
53-
#define FUNCTION __clc_fmax
54-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
5515
#include <clc/math/gentype.inc>
56-
#undef FUNCTION
57-
58-
#endif

libclc/clc/lib/generic/math/clc_fmin.cl

Lines changed: 2 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,52 +6,10 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include <clc/clcmacro.h>
109
#include <clc/internal/clc.h>
11-
#include <clc/relational/clc_isnan.h>
1210

13-
#define __FLOAT_ONLY
14-
#define __CLC_MIN_VECSIZE 1
1511
#define FUNCTION __clc_fmin
16-
#define __IMPL_FUNCTION __builtin_fminf
17-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
18-
#include <clc/math/gentype.inc>
19-
#undef __CLC_MIN_VECSIZE
20-
#undef FUNCTION
21-
#undef __IMPL_FUNCTION
22-
23-
#ifdef cl_khr_fp64
24-
25-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
26-
27-
#define __DOUBLE_ONLY
28-
#define __CLC_MIN_VECSIZE 1
29-
#define FUNCTION __clc_fmin
30-
#define __IMPL_FUNCTION __builtin_fmin
31-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
32-
#include <clc/math/gentype.inc>
33-
#undef __CLC_MIN_VECSIZE
34-
#undef FUNCTION
35-
#undef __IMPL_FUNCTION
12+
#define __IMPL_FUNCTION(x) __builtin_elementwise_minimumnum
13+
#define __CLC_BODY <clc/shared/binary_def.inc>
3614

37-
#endif
38-
39-
#ifdef cl_khr_fp16
40-
41-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
42-
43-
_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
44-
if (__clc_isnan(x))
45-
return y;
46-
if (__clc_isnan(y))
47-
return x;
48-
return (y < x) ? y : x;
49-
}
50-
51-
#define __HALF_ONLY
52-
#define __CLC_SUPPORTED_VECSIZE_OR_1 2
53-
#define FUNCTION __clc_fmin
54-
#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
5515
#include <clc/math/gentype.inc>
56-
57-
#endif

0 commit comments

Comments
 (0)