Skip to content

Optional SIMD memcmp #603

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions libc-top-half/musl/src/string/memcmp.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,42 @@
#include <string.h>

#ifdef __wasm_simd128__
#include <wasm_simd128.h>
#endif

int memcmp(const void *vl, const void *vr, size_t n)
{
#if defined(__wasm_simd128__) && defined(__wasilibc_simd_string)
if (n >= sizeof(v128_t)) {
// memcmp is allowed to read up to n bytes from each object.
// Find the first different character in the objects.
// Unaligned loads handle the case where the objects
// have mismatching alignments.
const v128_t *v1 = (v128_t *)vl;
const v128_t *v2 = (v128_t *)vr;
while (n) {
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(v1), wasm_v128_load(v2));
// Bitmask is slow on AArch64, all_true is much faster.
if (!wasm_i8x16_all_true(cmp)) {
// Find the offset of the first zero bit (little-endian).
size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
const unsigned char *u1 = (unsigned char *)v1 + ctz;
const unsigned char *u2 = (unsigned char *)v2 + ctz;
// This may help the compiler if the function is inlined.
__builtin_assume(*u1 - *u2 != 0);
return *u1 - *u2;
}
// This makes n a multiple of sizeof(v128_t)
// for every iteration except the first.
size_t align = (n - 1) % sizeof(v128_t) + 1;
v1 = (v128_t *)((char *)v1 + align);
v2 = (v128_t *)((char *)v2 + align);
n -= align;
}
return 0;
}
#endif

const unsigned char *l=vl, *r=vr;
for (; n && *l == *r; n--, l++, r++);
return n ? *l-*r : 0;
Expand Down
50 changes: 50 additions & 0 deletions test/src/misc/memcmp.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//! add-flags.py(LDFLAGS): -Wl,--stack-first -Wl,--initial-memory=327680

#include <__macro_PAGESIZE.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>

int sign(int val) {
return (0 < val) - (val < 0);
}
void test(char *ptr1, char *ptr2, size_t length, int want) {
int got = memcmp(ptr1, ptr2, length);
if (sign(got) != sign(want)) {
printf("memcmp(%p, %p, %lu) = %d, want %d\n", ptr1, ptr2, length, got,
want);
}
}

int main(void) {
char *const LIMIT = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE);

for (ptrdiff_t length = 0; length < 64; length++) {
for (ptrdiff_t alignment = 0; alignment < 24; alignment++) {
for (ptrdiff_t pos = -2; pos < length + 2; pos++) {
// Create a buffer with the given length, at a pointer with the given
// alignment. Using the offset LIMIT - PAGESIZE - 8 means many buffers
// will straddle a (Wasm, and likely OS) page boundary.
// The second buffer has a fixed address, which means it won't
// always share alignment with first buffer.
// Place the difference to find at every position in the buffers,
// including just prior to it and after its end.
char *ptr1 = LIMIT - PAGESIZE - 8 + alignment;
char *ptr2 = LIMIT - PAGESIZE / 2;
memset(LIMIT - 2 * PAGESIZE, 0, 2 * PAGESIZE);
memset(ptr1, 5, length);
memset(ptr2, 5, length);

ptr1[pos] = 7;
ptr2[pos] = 3;

test(ptr1, ptr2, length,
0 <= pos && pos < length ? ptr1[pos] - ptr2[pos] : 0);
test(ptr2, ptr1, length,
0 <= pos && pos < length ? ptr2[pos] - ptr1[pos] : 0);
}
}
}

return 0;
}