Skip to content

Commit 48a75f4

Browse files
authored
[SYCL][LIBCLC][NATIVECPU] Fix ldexp and popcount on Native CPU (#15687)
`ldexp` in `generic` (for `half`) and `popcount` in `native_cpu` are implemented using clang builtins. This PR makes it so we call the correct "overload" of the builtin: * `__builtin_ldexpf16` for `half` * `__builtin_popcountg` for unsigned integer types * for signed integer types the implicit cast to `int` performed by calling `__builtin_popcount` doesn't work due to sign extension, so we explicitly cast to `unsigned char` or `unsigned short`, and use `__builtin_popcountg`.
1 parent 9faf0e0 commit 48a75f4

File tree

3 files changed

+23
-9
lines changed

3 files changed

+23
-9
lines changed

libclc/generic/include/math/clc_ldexp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int);
77

88
#ifdef cl_khr_fp16
99
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
10-
_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int);
10+
_CLC_DEF _CLC_OVERLOAD half __clc_ldexp(half, int);
1111
#endif

libclc/generic/libspirv/math/clc_ldexp.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,6 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
135135

136136
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
137137

138-
_CLC_DEFINE_BINARY_BUILTIN(half, __clc_ldexp, __builtin_ldexp, half, int)
138+
_CLC_DEFINE_BINARY_BUILTIN(half, __clc_ldexp, __builtin_ldexpf16, half, int)
139139

140140
#endif

libclc/native_cpu-unknown-linux/libspirv/integer/popcount.cl

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,26 @@
22
#include <clcmacro.h>
33
#include <spirv/spirv.h>
44

5+
// We can't use __builtin_popcountg because it supports only unsigned
6+
// types, and we can't use __builtin_popcount because the implicit cast
7+
// to int doesn't work due to sign extension, so we use type punning to
8+
// preserve the bit pattern and avoid sign extension.
9+
10+
#define DEF_POPCOUNT_HELPER(TYPE, UTYPE) \
11+
_CLC_OVERLOAD TYPE __popcount_helper(TYPE c) { \
12+
return __builtin_popcountg(*(UTYPE*)&c); \
13+
}
14+
15+
DEF_POPCOUNT_HELPER(char, unsigned char)
16+
DEF_POPCOUNT_HELPER(schar, unsigned char)
17+
DEF_POPCOUNT_HELPER(short, unsigned short)
18+
519
_CLC_DEFINE_UNARY_BUILTIN(int, __spirv_ocl_popcount, __builtin_popcount, int)
620
_CLC_DEFINE_UNARY_BUILTIN(uint, __spirv_ocl_popcount, __builtin_popcount, uint)
7-
_CLC_DEFINE_UNARY_BUILTIN(short, __spirv_ocl_popcount, __builtin_popcount, short)
8-
_CLC_DEFINE_UNARY_BUILTIN(ushort, __spirv_ocl_popcount, __builtin_popcount, ushort)
9-
_CLC_DEFINE_UNARY_BUILTIN(long, __spirv_ocl_popcount, __builtin_popcount, long)
10-
_CLC_DEFINE_UNARY_BUILTIN(ulong, __spirv_ocl_popcount, __builtin_popcount, ulong)
11-
_CLC_DEFINE_UNARY_BUILTIN(char, __spirv_ocl_popcount, __builtin_popcount, char)
12-
_CLC_DEFINE_UNARY_BUILTIN(uchar, __spirv_ocl_popcount, __builtin_popcount, uchar)
13-
_CLC_DEFINE_UNARY_BUILTIN(schar, __spirv_ocl_popcount, __builtin_popcount, schar)
21+
_CLC_DEFINE_UNARY_BUILTIN(short, __spirv_ocl_popcount, __popcount_helper, short)
22+
_CLC_DEFINE_UNARY_BUILTIN(ushort, __spirv_ocl_popcount, __builtin_popcountg, ushort)
23+
_CLC_DEFINE_UNARY_BUILTIN(long, __spirv_ocl_popcount, __builtin_popcountl, long)
24+
_CLC_DEFINE_UNARY_BUILTIN(ulong, __spirv_ocl_popcount, __builtin_popcountl, ulong)
25+
_CLC_DEFINE_UNARY_BUILTIN(char, __spirv_ocl_popcount, __popcount_helper, char)
26+
_CLC_DEFINE_UNARY_BUILTIN(uchar, __spirv_ocl_popcount, __builtin_popcountg, uchar)
27+
_CLC_DEFINE_UNARY_BUILTIN(schar, __spirv_ocl_popcount, __popcount_helper, schar)

0 commit comments

Comments
 (0)