Skip to content

Commit 553305f

Browse files
authored
Optional SIMD strlen (#586)
This is a potential first step at upstreaming #580 piecemeal. Chose `strlen` as it's already covered by tests. It's also one of the simplest (easiest to review) implementations. Built and tested with: ```sh CC=[PATH_TO]/wasi-sdk-25.0-x86_64-linux/bin/clang \ EXTRA_CFLAGS="-O2 -DNDEBUG -msimd128 -mbulk-memory -D__wasilibc_simd_string" make (cd test ; CC=[PATH_TO]/wasi-sdk-25.0-x86_64-linux/bin/clang make ) ```
1 parent 205dd23 commit 553305f

File tree

4 files changed

+87
-2
lines changed

4 files changed

+87
-2
lines changed

.github/workflows/main.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,15 @@ jobs:
110110
TARGET_TRIPLE: wasm32-wasip1-threads
111111
THREAD_MODEL: posix
112112

113+
- name: Test wasm32-wasi-simd
114+
os: ubuntu-24.04
115+
clang_version: 16
116+
test: true
117+
upload: wasm32-wasi-simd
118+
env:
119+
MAKE_TARGETS: "no-check-symbols"
120+
EXTRA_CFLAGS: "-O2 -DNDEBUG -msimd128 -mrelaxed-simd -mbulk-memory -D__wasilibc_simd_string"
121+
113122
steps:
114123
- uses: actions/[email protected]
115124
with:

Makefile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -808,11 +808,13 @@ $(DUMMY_LIBS):
808808
$(AR) crs "$$lib"; \
809809
done
810810

811-
finish: $(STARTUP_FILES) libc $(DUMMY_LIBS)
811+
no-check-symbols: $(STARTUP_FILES) libc $(DUMMY_LIBS)
812812
#
813813
# The build succeeded! The generated sysroot is in $(SYSROOT).
814814
#
815815

816+
finish: no-check-symbols
817+
816818
ifeq ($(LTO),no)
817819
# The check for defined and undefined symbols expects there to be a heap
818820
# allocator (providing malloc, calloc, free, etc). Skip this step if the build
@@ -1033,4 +1035,4 @@ clean:
10331035
$(RM) -r "$(OBJDIR)"
10341036
$(RM) -r "$(SYSROOT)"
10351037

1036-
.PHONY: default libc libc_so finish install clean check-symbols bindings
1038+
.PHONY: default libc libc_so finish install clean check-symbols no-check-symbols bindings

libc-top-half/musl/src/string/strlen.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,47 @@
22
#include <stdint.h>
33
#include <limits.h>
44

5+
#ifdef __wasm_simd128__
6+
#include <wasm_simd128.h>
7+
#endif
8+
59
#define ALIGN (sizeof(size_t))
610
#define ONES ((size_t)-1/UCHAR_MAX)
711
#define HIGHS (ONES * (UCHAR_MAX/2+1))
812
#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
913

1014
size_t strlen(const char *s)
1115
{
16+
#if defined(__wasm_simd128__) && defined(__wasilibc_simd_string)
17+
// strlen must stop as soon as it finds the terminator.
18+
// Aligning ensures loads beyond the terminator are safe.
19+
// Casting through uintptr_t makes this implementation-defined,
20+
// rather than undefined behavior.
21+
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
22+
const v128_t *v = (v128_t *)((uintptr_t)s - align);
23+
24+
for (;;) {
25+
// Bitmask is slow on AArch64, all_true is much faster.
26+
if (!wasm_i8x16_all_true(*v)) {
27+
const v128_t cmp = wasm_i8x16_eq(*v, (v128_t){});
28+
// Clear the bits corresponding to align (little-endian)
29+
// so we can count trailing zeros.
30+
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
31+
// At least one bit will be set, unless align cleared them.
32+
// Knowing this helps the compiler if it unrolls the loop.
33+
__builtin_assume(mask || align);
34+
// If the mask became zero because of align,
35+
// it's as if we didn't find anything.
36+
if (mask) {
37+
// Find the offset of the first one bit (little-endian).
38+
return (char *)v - s + __builtin_ctz(mask);
39+
}
40+
}
41+
align = 0;
42+
v++;
43+
}
44+
#endif
45+
1246
const char *a = s;
1347
#ifdef __GNUC__
1448
typedef size_t __attribute__((__may_alias__)) word;

test/src/misc/strlen.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//! add-flags.py(LDFLAGS): -Wl,--stack-first -Wl,--initial-memory=327680
2+
3+
#include <__macro_PAGESIZE.h>
4+
#include <stdio.h>
5+
#include <string.h>
6+
7+
void test(char *ptr, size_t want) {
8+
size_t got = strlen(ptr);
9+
if (got != want) {
10+
printf("strlen(%p) = %lu, want %lu\n", ptr, got, want);
11+
}
12+
}
13+
14+
int main(void) {
15+
char *const LIMIT = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE);
16+
17+
for (size_t length = 0; length < 64; length++) {
18+
for (size_t alignment = 0; alignment < 24; alignment++) {
19+
// Create a string with the given length, at a pointer with the given
20+
// alignment. Using the offset LIMIT - PAGESIZE - 8 means many strings
21+
// will straddle a (Wasm, and likely OS) page boundary.
22+
char *ptr = LIMIT - PAGESIZE - 8 + alignment;
23+
memset(LIMIT - 2 * PAGESIZE, 0, 2 * PAGESIZE);
24+
memset(ptr, 5, length);
25+
test(ptr, length);
26+
27+
// Make sure we're not fooled by non-zero characters prior to the string.
28+
ptr[-1] = 5;
29+
test(ptr, length);
30+
}
31+
32+
// Ensure we never read past the end of memory.
33+
char *ptr = LIMIT - length - 1;
34+
memset(LIMIT - 2 * PAGESIZE, 0, 2 * PAGESIZE);
35+
memset(ptr, 5, length);
36+
test(ptr, length);
37+
}
38+
39+
return 0;
40+
}

0 commit comments

Comments
 (0)