Skip to content

Commit c3a0fcc

Browse files
authored
[libclc] Optimize CLC vector any/all builtins (llvm#124568)
By using the vector reduction buitins we can avoid scalarization. Targets that don't support vector reductions will scalarize later on anyway. The vector reduction builtins should be well-enough supported by the middle-end to be a generic solution. This produces conceptually equivalent code: all vector elements are OR'd/AND'd together and the final scalar is bit-shifted and masked to produce the final result. The 'normalize' builtin uses 'all' so its code has similarly improved in places.
1 parent 38b3f45 commit c3a0fcc

File tree

2 files changed

+30
-44
lines changed

2 files changed

+30
-44
lines changed
Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,21 @@
11
#include <clc/internal/clc.h>
22

33
#define _CLC_ALL(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
4-
#define _CLC_ALL2(v) (_CLC_ALL((v).s0) & _CLC_ALL((v).s1))
5-
#define _CLC_ALL3(v) (_CLC_ALL2((v)) & _CLC_ALL((v).s2))
6-
#define _CLC_ALL4(v) (_CLC_ALL3((v)) & _CLC_ALL((v).s3))
7-
#define _CLC_ALL8(v) \
8-
(_CLC_ALL4((v)) & _CLC_ALL((v).s4) & _CLC_ALL((v).s5) & _CLC_ALL((v).s6) & \
9-
_CLC_ALL((v).s7))
10-
#define _CLC_ALL16(v) \
11-
(_CLC_ALL8((v)) & _CLC_ALL((v).s8) & _CLC_ALL((v).s9) & _CLC_ALL((v).sA) & \
12-
_CLC_ALL((v).sB) & _CLC_ALL((v).sC) & _CLC_ALL((v).sD) & _CLC_ALL((v).sE) & \
13-
_CLC_ALL((v).sf))
144

15-
#define ALL_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v)
5+
#define _CLC_ALL_VEC(TYPE) \
6+
_CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v) { \
7+
return _CLC_ALL(__builtin_reduce_and(v)); \
8+
}
169

17-
#define ALL_VECTORIZE(TYPE) \
18-
ALL_ID(TYPE) { return _CLC_ALL(v); } \
19-
ALL_ID(TYPE##2) { return _CLC_ALL2(v); } \
20-
ALL_ID(TYPE##3) { return _CLC_ALL3(v); } \
21-
ALL_ID(TYPE##4) { return _CLC_ALL4(v); } \
22-
ALL_ID(TYPE##8) { return _CLC_ALL8(v); } \
23-
ALL_ID(TYPE##16) { return _CLC_ALL16(v); }
10+
#define _CLC_DEFINE_ALL(TYPE) \
11+
_CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v) { return _CLC_ALL(v); } \
12+
_CLC_ALL_VEC(TYPE##2) \
13+
_CLC_ALL_VEC(TYPE##3) \
14+
_CLC_ALL_VEC(TYPE##4) \
15+
_CLC_ALL_VEC(TYPE##8) \
16+
_CLC_ALL_VEC(TYPE##16)
2417

25-
ALL_VECTORIZE(char)
26-
ALL_VECTORIZE(short)
27-
ALL_VECTORIZE(int)
28-
ALL_VECTORIZE(long)
18+
_CLC_DEFINE_ALL(char)
19+
_CLC_DEFINE_ALL(short)
20+
_CLC_DEFINE_ALL(int)
21+
_CLC_DEFINE_ALL(long)
Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,21 @@
11
#include <clc/internal/clc.h>
22

33
#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
4-
#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1))
5-
#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2))
6-
#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3))
7-
#define _CLC_ANY8(v) \
8-
(_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) | _CLC_ANY((v).s6) | \
9-
_CLC_ANY((v).s7))
10-
#define _CLC_ANY16(v) \
11-
(_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) | _CLC_ANY((v).sA) | \
12-
_CLC_ANY((v).sB) | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) | _CLC_ANY((v).sE) | \
13-
_CLC_ANY((v).sf))
144

15-
#define ANY_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v)
5+
#define _CLC_ANY_VEC(TYPE) \
6+
_CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v) { \
7+
return _CLC_ANY(__builtin_reduce_or(v)); \
8+
}
169

17-
#define ANY_VECTORIZE(TYPE) \
18-
ANY_ID(TYPE) { return _CLC_ANY(v); } \
19-
ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \
20-
ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \
21-
ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \
22-
ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \
23-
ANY_ID(TYPE##16) { return _CLC_ANY16(v); }
10+
#define _CLC_DEFINE_ANY(TYPE) \
11+
_CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v) { return _CLC_ANY(v); } \
12+
_CLC_ANY_VEC(TYPE##2) \
13+
_CLC_ANY_VEC(TYPE##3) \
14+
_CLC_ANY_VEC(TYPE##4) \
15+
_CLC_ANY_VEC(TYPE##8) \
16+
_CLC_ANY_VEC(TYPE##16)
2417

25-
ANY_VECTORIZE(char)
26-
ANY_VECTORIZE(short)
27-
ANY_VECTORIZE(int)
28-
ANY_VECTORIZE(long)
18+
_CLC_DEFINE_ANY(char)
19+
_CLC_DEFINE_ANY(short)
20+
_CLC_DEFINE_ANY(int)
21+
_CLC_DEFINE_ANY(long)

0 commit comments

Comments
 (0)