Skip to content

Commit 5c411b3

Browse files
authored
[libclc] Use elementwise ctlz/cttz builtins for CLC clz/ctz (#154535)
Using the elementwise builtin optimizes the vector case; instead of scalarizing we can compile directly to the vector intrinsics.
1 parent f2aedc2 commit 5c411b3

File tree

4 files changed

+24
-64
lines changed

4 files changed

+24
-64
lines changed

libclc/clc/lib/generic/integer/clc_clz.cl

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,38 +10,5 @@
1010
#include <clc/integer/clc_clz.h>
1111
#include <clc/internal/clc.h>
1212

13-
_CLC_OVERLOAD _CLC_DEF char __clc_clz(char x) {
14-
return __clc_clz((ushort)(uchar)x) - 8;
15-
}
16-
17-
_CLC_OVERLOAD _CLC_DEF uchar __clc_clz(uchar x) {
18-
return __clc_clz((ushort)x) - 8;
19-
}
20-
21-
_CLC_OVERLOAD _CLC_DEF short __clc_clz(short x) {
22-
return x ? __builtin_clzs(x) : 16;
23-
}
24-
25-
_CLC_OVERLOAD _CLC_DEF ushort __clc_clz(ushort x) {
26-
return x ? __builtin_clzs(x) : 16;
27-
}
28-
29-
_CLC_OVERLOAD _CLC_DEF int __clc_clz(int x) {
30-
return x ? __builtin_clz(x) : 32;
31-
}
32-
33-
_CLC_OVERLOAD _CLC_DEF uint __clc_clz(uint x) {
34-
return x ? __builtin_clz(x) : 32;
35-
}
36-
37-
_CLC_OVERLOAD _CLC_DEF long __clc_clz(long x) {
38-
return x ? __builtin_clzl(x) : 64;
39-
}
40-
41-
_CLC_OVERLOAD _CLC_DEF ulong __clc_clz(ulong x) {
42-
return x ? __builtin_clzl(x) : 64;
43-
}
44-
45-
#define __CLC_FUNCTION __clc_clz
46-
#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
13+
#define __CLC_BODY <clc_clz.inc>
4714
#include <clc/integer/gentype.inc>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clz(__CLC_GENTYPE x) {
10+
return __builtin_elementwise_ctlz(x, (__CLC_GENTYPE)__CLC_GENSIZE);
11+
}

libclc/clc/lib/generic/integer/clc_ctz.cl

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,34 +10,5 @@
1010
#include <clc/integer/clc_ctz.h>
1111
#include <clc/internal/clc.h>
1212

13-
_CLC_OVERLOAD _CLC_DEF char __clc_ctz(char x) {
14-
return __clc_ctz(__clc_as_uchar(x));
15-
}
16-
17-
_CLC_OVERLOAD _CLC_DEF uchar __clc_ctz(uchar x) { return __builtin_ctzg(x, 8); }
18-
19-
_CLC_OVERLOAD _CLC_DEF short __clc_ctz(short x) {
20-
return __clc_ctz(__clc_as_ushort(x));
21-
}
22-
23-
_CLC_OVERLOAD _CLC_DEF ushort __clc_ctz(ushort x) {
24-
return __builtin_ctzg(x, 16);
25-
}
26-
27-
_CLC_OVERLOAD _CLC_DEF int __clc_ctz(int x) {
28-
return __clc_ctz(__clc_as_uint(x));
29-
}
30-
31-
_CLC_OVERLOAD _CLC_DEF uint __clc_ctz(uint x) { return __builtin_ctzg(x, 32); }
32-
33-
_CLC_OVERLOAD _CLC_DEF long __clc_ctz(long x) {
34-
return __clc_ctz(__clc_as_ulong(x));
35-
}
36-
37-
_CLC_OVERLOAD _CLC_DEF ulong __clc_ctz(ulong x) {
38-
return __builtin_ctzg(x, 64);
39-
}
40-
41-
#define __CLC_FUNCTION __clc_ctz
42-
#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
13+
#define __CLC_BODY <clc_ctz.inc>
4314
#include <clc/integer/gentype.inc>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_ctz(__CLC_GENTYPE x) {
10+
return __builtin_elementwise_cttz(x, (__CLC_GENTYPE)__CLC_GENSIZE);
11+
}

0 commit comments

Comments
 (0)