Skip to content

Commit 4525255

Browse files
authored
Optional SIMD strchrnul (#594)
Continuing #580. `strchrnul` is used by `strchr`, `strcspn` and many other places in libc. The implementation is a matchup between `strlen` and `memchr`.
1 parent 4ea6fdf commit 4525255

File tree

3 files changed

+108
-3
lines changed

3 files changed

+108
-3
lines changed

libc-top-half/musl/src/string/strchrnul.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
#include <stdint.h>
33
#include <limits.h>
44

5+
#ifdef __wasm_simd128__
6+
#include <wasm_simd128.h>
7+
#endif
8+
59
#define ALIGN (sizeof(size_t))
610
#define ONES ((size_t)-1/UCHAR_MAX)
711
#define HIGHS (ONES * (UCHAR_MAX/2+1))
@@ -12,6 +16,49 @@ char *__strchrnul(const char *s, int c)
1216
c = (unsigned char)c;
1317
if (!c) return (char *)s + strlen(s);
1418

19+
#if defined(__wasm_simd128__) && defined(__wasilibc_simd_string)
20+
// Skip Clang 19 and Clang 20 which have a bug (llvm/llvm-project#146574)
21+
// which results in an ICE when inline assembly is used with a vector result.
22+
#if __clang_major__ != 19 && __clang_major__ != 20
23+
// Note that reading before/after the allocation of a pointer is UB in
24+
// C, so inline assembly is used to generate the exact machine
25+
// instruction we want with opaque semantics to the compiler to avoid
26+
// the UB.
27+
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
28+
uintptr_t addr = (uintptr_t)s - align;
29+
v128_t vc = wasm_i8x16_splat(c);
30+
31+
for (;;) {
32+
v128_t v;
33+
__asm__ (
34+
"local.get %1\n"
35+
"v128.load 0\n"
36+
"local.set %0\n"
37+
: "=r"(v)
38+
: "r"(addr)
39+
: "memory");
40+
const v128_t cmp = wasm_i8x16_eq(v, (v128_t){}) | wasm_i8x16_eq(v, vc);
41+
// Bitmask is slow on AArch64, any_true is much faster.
42+
if (wasm_v128_any_true(cmp)) {
43+
// Clear the bits corresponding to align (little-endian)
44+
// so we can count trailing zeros.
45+
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
46+
// At least one bit will be set, unless align cleared them.
47+
// Knowing this helps the compiler if it unrolls the loop.
48+
__builtin_assume(mask || align);
49+
// If the mask became zero because of align,
50+
// it's as if we didn't find anything.
51+
if (mask) {
52+
// Find the offset of the first one bit (little-endian).
53+
return (char *)s + (addr - (uintptr_t)s + __builtin_ctz(mask));
54+
}
55+
}
56+
align = 0;
57+
addr += sizeof(v128_t);
58+
}
59+
#endif
60+
#endif
61+
1562
#ifdef __GNUC__
1663
typedef size_t __attribute__((__may_alias__)) word;
1764
const word *w;

test/src/misc/memchr.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ void test(char *ptr, size_t length, void *want) {
1515
int main(void) {
1616
char *const LIMIT = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE);
1717

18-
for (size_t length = 0; length < 64; length++) {
19-
for (size_t alignment = 0; alignment < 24; alignment++) {
18+
for (ptrdiff_t length = 0; length < 64; length++) {
19+
for (ptrdiff_t alignment = 0; alignment < 24; alignment++) {
2020
for (ptrdiff_t pos = -2; pos < length + 2; pos++) {
2121
// Create a buffer with the given length, at a pointer with the given
2222
// alignment. Using the offset LIMIT - PAGESIZE - 8 means many buffers
@@ -26,10 +26,10 @@ int main(void) {
2626
char *ptr = LIMIT - PAGESIZE - 8 + alignment;
2727
memset(LIMIT - 2 * PAGESIZE, 0, 2 * PAGESIZE);
2828
memset(ptr, 5, length);
29-
ptr[pos] = 7;
3029

3130
// The first instance of the character is found.
3231
if (pos >= 0) ptr[pos + 2] = 7;
32+
ptr[pos] = 7;
3333

3434
// The character is found if it's within range.
3535
test(ptr, length, 0 <= pos && pos < length ? &ptr[pos] : NULL);

test/src/misc/strchrnul.c

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
//! add-flags.py(LDFLAGS): -Wl,--stack-first -Wl,--initial-memory=327680
2+
3+
#define _GNU_SOURCE
4+
5+
#include <__macro_PAGESIZE.h>
6+
#include <stddef.h>
7+
#include <stdio.h>
8+
#include <string.h>
9+
10+
void test(char *ptr, char *want) {
11+
char *got = strchrnul(ptr, 7);
12+
if (got != want) {
13+
printf("strchrnul(%p, 7) = %p, want %p\n", ptr, got, want);
14+
}
15+
}
16+
17+
int main(void) {
18+
char *const LIMIT = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE);
19+
20+
for (ptrdiff_t length = 0; length < 64; length++) {
21+
for (ptrdiff_t alignment = 0; alignment < 24; alignment++) {
22+
for (ptrdiff_t pos = -2; pos < length + 2; pos++) {
23+
// Create a buffer with the given length, at a pointer with the given
24+
// alignment. Using the offset LIMIT - PAGESIZE - 8 means many buffers
25+
// will straddle a (Wasm, and likely OS) page boundary. Place the
26+
// character to find at every position in the buffer, including just
27+
// prior to it and after its end.
28+
char *ptr = LIMIT - PAGESIZE - 8 + alignment;
29+
memset(LIMIT - 2 * PAGESIZE, 0, 2 * PAGESIZE);
30+
memset(ptr, 5, length);
31+
32+
// The first instance of the character is found.
33+
if (pos >= 0) ptr[pos + 2] = 7;
34+
ptr[pos] = 7;
35+
ptr[length] = 0;
36+
37+
// The character is found if it's within range.
38+
test(ptr, 0 <= pos && pos < length ? &ptr[pos] : &ptr[length]);
39+
}
40+
}
41+
42+
// We need space for the terminator.
43+
if (length == 0) continue;
44+
45+
// Ensure we never read past the end of memory.
46+
char *ptr = LIMIT - length;
47+
memset(LIMIT - 2 * PAGESIZE, 0, 2 * PAGESIZE);
48+
memset(ptr, 5, length);
49+
50+
ptr[length - 1] = 7;
51+
test(ptr, &ptr[length - 1]);
52+
53+
ptr[length - 1] = 0;
54+
test(ptr, &ptr[length - 1]);
55+
}
56+
57+
return 0;
58+
}

0 commit comments

Comments
 (0)