Skip to content

Commit 7f36611

Browse files
authored
[libclc] Remove __attribute__((always_inline)) (#158791)
always_inline doesn't guarantee performance improvement. Target-specific optimizations decide whether inlining is profitable. Changes to amdgcn--amdhsa.bc: * _Z9__clc_logDv16_f and _Z15__clc_remainderDv16_fS_ are not inlined. * sincos vector function code size has doubled due to apparent duplication. Also replace typo _CLC_DECL with _CLC_DEF for function definition.
1 parent 5cc4193 commit 7f36611

File tree

6 files changed

+19
-23
lines changed

6 files changed

+19
-23
lines changed

libclc/clc/include/clc/clcfunc.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,13 @@
1111

1212
#define _CLC_OVERLOAD __attribute__((overloadable))
1313
#define _CLC_DECL
14-
#define _CLC_INLINE __attribute__((always_inline)) inline
14+
#define _CLC_INLINE inline
1515
#define _CLC_CONST __attribute__((const))
1616

17-
// avoid inlines for SPIR-V related targets since we'll optimise later in the
18-
// chain
19-
#if defined(CLC_SPIRV)
20-
#define _CLC_DEF
21-
#elif defined(CLC_CLSPV)
17+
#if defined(CLC_CLSPV)
2218
#define _CLC_DEF __attribute__((noinline)) __attribute__((clspv_libclc_builtin))
2319
#else
24-
#define _CLC_DEF __attribute__((always_inline))
20+
#define _CLC_DEF
2521
#endif
2622

2723
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \

libclc/clc/include/clc/misc/shuffle2_def.inc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,22 @@
1818
// The return type is same base type as the input type, with the same vector
1919
// size as the mask. Elements in the mask must be the same size (number of bits)
2020
// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
21-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
21+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
2222
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x,
2323
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) y, __CLC_U_GENTYPE mask) {
2424
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);
2525
}
26-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
26+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
2727
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x,
2828
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) y, __CLC_U_GENTYPE mask) {
2929
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);
3030
}
31-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
31+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
3232
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x,
3333
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) y, __CLC_U_GENTYPE mask) {
3434
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);
3535
}
36-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(
36+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(
3737
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x,
3838
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) y, __CLC_U_GENTYPE mask) {
3939
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);

libclc/clc/include/clc/misc/shuffle_def.inc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,19 @@
1818
// The return type is same base type as the input type, with the same vector
1919
// size as the mask. Elements in the mask must be the same size (number of bits)
2020
// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
21-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
21+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
2222
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, __CLC_U_GENTYPE mask) {
2323
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
2424
}
25-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
25+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
2626
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, __CLC_U_GENTYPE mask) {
2727
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
2828
}
29-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
29+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
3030
__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, __CLC_U_GENTYPE mask) {
3131
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
3232
}
33-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(
33+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(
3434
__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, __CLC_U_GENTYPE mask) {
3535
return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
3636
}

libclc/clc/lib/generic/atomic/clc_atomic_compare_exchange.inc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#ifdef __CLC_FPSIZE
2525

2626
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
27-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_atomic_compare_exchange( \
27+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atomic_compare_exchange( \
2828
volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator, \
2929
__CLC_GENTYPE Value, int MemoryOrderEqual, int MemoryOrderUnequal, \
3030
int MemoryScope) { \
@@ -38,7 +38,7 @@
3838
#else
3939

4040
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
41-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_atomic_compare_exchange( \
41+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atomic_compare_exchange( \
4242
volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator, \
4343
__CLC_GENTYPE Value, int MemoryOrderEqual, int MemoryOrderUnequal, \
4444
int MemoryScope) { \

libclc/clc/lib/generic/atomic/clc_atomic_def.inc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,15 @@
3131

3232
#ifdef __CLC_NO_VALUE_ARG
3333
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
34-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION( \
34+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \
3535
volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder, \
3636
int MemoryScope) { \
3737
return __CLC_AS_RETTYPE(__CLC_IMPL_FUNCTION( \
3838
(ADDRSPACE __CLC_PTR_CASTTYPE *)Ptr, MemoryOrder, MemoryScope)); \
3939
}
4040
#elif defined(__CLC_INC_DEC)
4141
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
42-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION( \
42+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \
4343
volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder, \
4444
int MemoryScope) { \
4545
return __CLC_AS_RETTYPE( \
@@ -48,15 +48,15 @@
4848
}
4949
#elif defined(__CLC_RETURN_VOID)
5050
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
51-
_CLC_OVERLOAD _CLC_DECL void __CLC_FUNCTION( \
51+
_CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION( \
5252
volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value, \
5353
int MemoryOrder, int MemoryScope) { \
5454
__CLC_IMPL_FUNCTION((ADDRSPACE __CLC_PTR_CASTTYPE *)Ptr, Value, \
5555
MemoryOrder, MemoryScope); \
5656
}
5757
#else
5858
#define __CLC_DEFINE_ATOMIC(ADDRSPACE) \
59-
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION( \
59+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION( \
6060
volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value, \
6161
int MemoryOrder, int MemoryScope) { \
6262
return __CLC_AS_RETTYPE( \

libclc/clc/lib/generic/math/clc_sincos_helpers.inc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
7474
return ret;
7575
}
7676

77-
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
78-
__CLC_INTN regn) {
77+
_CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
78+
__CLC_INTN regn) {
7979
// Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4].
8080
__CLC_FLOATN r = x * x;
8181

0 commit comments

Comments
 (0)