Skip to content

Commit 06789cc

Browse files
authored
[libclc] Optimize ceil/fabs/floor/rint/trunc (#119596)
These functions all map to the corresponding LLVM intrinsics, but the vector intrinsics weren't being generated. The intrinsic mapping from CLC vector function to vector intrinsic was working correctly, but the mapping from OpenCL builtin to CLC function was suboptimally recursively splitting vectors in halves. For example, with this change, `ceil(float16)` calls `llvm.ceil.v16f32` directly once optimizations are applied. Now also, instead of generating LLVM intrinsics through `__asm` we now call clang elementwise builtins for each CLC builtin. This should be a more standard way of achieving the same result The CLC versions of each of these builtins are also now built and enabled for SPIR-V targets. The LLVM -> SPIR-V translator maps the intrinsics to the appropriate OpExtInst, so there should be no difference in semantics, despite the newly introduced indirection from OpenCL builtin through the CLC builtin to the intrinsic. The AMDGPU targets make use of the same `_CLC_DEFINE_UNARY_BUILTIN` macro to override `sqrt`, so those functions also appear more optimal with this change, calling the vector `llvm.sqrt.vXf32` intrinsics directly.
1 parent 3d6b2d4 commit 06789cc

File tree

24 files changed

+92
-66
lines changed

24 files changed

+92
-66
lines changed

libclc/clc/include/clc/clcmacro.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,21 @@
191191

192192
#define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \
193193
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { return BUILTIN(x); } \
194-
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE)
194+
_CLC_DEF _CLC_OVERLOAD RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
195+
return BUILTIN(x); \
196+
} \
197+
_CLC_DEF _CLC_OVERLOAD RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
198+
return BUILTIN(x); \
199+
} \
200+
_CLC_DEF _CLC_OVERLOAD RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
201+
return BUILTIN(x); \
202+
} \
203+
_CLC_DEF _CLC_OVERLOAD RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
204+
return BUILTIN(x); \
205+
} \
206+
_CLC_DEF _CLC_OVERLOAD RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
207+
return BUILTIN(x); \
208+
}
195209

196210
#ifdef cl_khr_fp16
197211

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
#ifndef __CLC_MATH_CLC_CEIL_H__
22
#define __CLC_MATH_CLC_CEIL_H__
33

4-
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
5-
// clspv and spir-v targets provide their own OpenCL-compatible ceil
6-
#define __clc_ceil ceil
7-
#else
8-
9-
// Map the function to an LLVM intrinsic
4+
#define __CLC_BODY <clc/math/unary_decl.inc>
105
#define __CLC_FUNCTION __clc_ceil
11-
#define __CLC_INTRINSIC "llvm.ceil"
12-
#include <clc/math/unary_intrin.inc>
136

14-
#undef __CLC_INTRINSIC
15-
#undef __CLC_FUNCTION
7+
#include <clc/math/gentype.inc>
168

17-
#endif
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
1811

1912
#endif // __CLC_MATH_CLC_CEIL_H__
Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
#ifndef __CLC_MATH_CLC_FABS_H__
22
#define __CLC_MATH_CLC_FABS_H__
33

4-
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
5-
// clspv and spir-v targets provide their own OpenCL-compatible fabs
6-
#define __clc_fabs fabs
7-
#else
8-
9-
// Map the function to an LLVM intrinsic
4+
#define __CLC_BODY <clc/math/unary_decl.inc>
105
#define __CLC_FUNCTION __clc_fabs
11-
#define __CLC_INTRINSIC "llvm.fabs"
12-
#include <clc/math/unary_intrin.inc>
136

14-
#undef __CLC_INTRINSIC
15-
#undef __CLC_FUNCTION
7+
#include <clc/math/gentype.inc>
168

17-
#endif
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
1811

1912
#endif // __CLC_MATH_CLC_FABS_H__
Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
#ifndef __CLC_MATH_CLC_FLOOR_H__
22
#define __CLC_MATH_CLC_FLOOR_H__
33

4-
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
5-
// clspv and spir-v targets provide their own OpenCL-compatible floor
6-
#define __clc_floor floor
7-
#else
8-
9-
// Map the function to an LLVM intrinsic
4+
#define __CLC_BODY <clc/math/unary_decl.inc>
105
#define __CLC_FUNCTION __clc_floor
11-
#define __CLC_INTRINSIC "llvm.floor"
12-
#include <clc/math/unary_intrin.inc>
136

14-
#undef __CLC_INTRINSIC
15-
#undef __CLC_FUNCTION
7+
#include <clc/math/gentype.inc>
168

17-
#endif
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
1811

1912
#endif // __CLC_MATH_CLC_FLOOR_H__
Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
#ifndef __CLC_MATH_CLC_RINT_H__
22
#define __CLC_MATH_CLC_RINT_H__
33

4-
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
5-
// clspv and spir-v targets provide their own OpenCL-compatible rint
6-
#define __clc_rint rint
7-
#else
8-
9-
// Map the function to an LLVM intrinsic
4+
#define __CLC_BODY <clc/math/unary_decl.inc>
105
#define __CLC_FUNCTION __clc_rint
11-
#define __CLC_INTRINSIC "llvm.rint"
12-
#include <clc/math/unary_intrin.inc>
136

14-
#undef __CLC_INTRINSIC
15-
#undef __CLC_FUNCTION
7+
#include <clc/math/gentype.inc>
168

17-
#endif
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
1811

1912
#endif // __CLC_MATH_CLC_RINT_H__
Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
#ifndef __CLC_MATH_CLC_TRUNC_H__
22
#define __CLC_MATH_CLC_TRUNC_H__
33

4-
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
5-
// clspv and spir-v targets provide their own OpenCL-compatible trunc
6-
#define __clc_trunc trunc
7-
#else
8-
9-
// Map the function to an LLVM intrinsic
4+
#define __CLC_BODY <clc/math/unary_decl.inc>
105
#define __CLC_FUNCTION __clc_trunc
11-
#define __CLC_INTRINSIC "llvm.trunc"
12-
#include <clc/math/unary_intrin.inc>
136

14-
#undef __CLC_INTRINSIC
15-
#undef __CLC_FUNCTION
7+
#include <clc/math/gentype.inc>
168

17-
#endif
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
1811

1912
#endif // __CLC_MATH_CLC_TRUNC_H__

libclc/clc/lib/clspv/SOURCES

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1-
dummy.cl
1+
../generic/math/clc_ceil.cl
2+
../generic/math/clc_fabs.cl
3+
../generic/math/clc_floor.cl
4+
../generic/math/clc_rint.cl
5+
../generic/math/clc_trunc.cl

libclc/clc/lib/clspv/dummy.cl

Lines changed: 0 additions & 1 deletion
This file was deleted.

libclc/clc/lib/generic/SOURCES

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
geometric/clc_dot.cl
22
integer/clc_abs.cl
33
integer/clc_abs_diff.cl
4+
math/clc_ceil.cl
5+
math/clc_fabs.cl
6+
math/clc_floor.cl
7+
math/clc_rint.cl
8+
math/clc_trunc.cl
49
relational/clc_all.cl
510
relational/clc_any.cl
611
relational/clc_bitselect.cl

0 commit comments

Comments
 (0)