Skip to content

Commit bb87a92

Browse files
committed
Fix memchr.
1 parent 4837933 commit bb87a92

File tree

5 files changed

+130
-74
lines changed

5 files changed

+130
-74
lines changed

sqlite3/libc/build.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,13 @@ SRCS="${1:-libc.c}"
1010
"../tools.sh"
1111

1212
trap 'rm -f libc.c libc.tmp' EXIT
13-
echo '#include <string.h>' > libc.c
14-
echo '#include <stdlib.h>' >> libc.c
13+
cat << EOF > libc.c
14+
#include <string.h>
15+
#include <stdlib.h>
16+
EOF
1517

1618
"$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \
19+
-Wall -Wextra -Wno-unused-parameter -Wno-unused-function \
1720
-o libc.wasm -I. "$SRCS" \
1821
-mexec-model=reactor \
1922
-msimd128 -mmutable-globals -mmultivalue \

sqlite3/libc/libc.wasm

815 Bytes
Binary file not shown.

sqlite3/libc/libc.wat

Lines changed: 100 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -142,42 +142,53 @@
142142
(local $3 i32)
143143
(local $4 i32)
144144
(local $5 i32)
145-
(local $6 v128)
145+
(local $6 i32)
146146
(local $7 v128)
147-
(local.set $4
148-
(i32.and
149-
(local.get $0)
150-
(i32.const 15)
147+
(local $8 v128)
148+
(local $scratch i32)
149+
(block $block
150+
(br_if $block
151+
(i32.eqz
152+
(local.get $2)
153+
)
151154
)
152-
)
153-
(block $block1
154-
(block $block
155-
(if
156-
(v128.any_true
157-
(local.tee $6
158-
(i8x16.eq
159-
(v128.load
160-
(local.tee $3
161-
(i32.and
162-
(local.get $0)
163-
(i32.const -16)
164-
)
165-
)
166-
)
155+
(local.set $4
156+
(i32.and
157+
(local.get $0)
158+
(i32.const 15)
159+
)
160+
)
161+
(block $block2
162+
(block $block1
163+
(br_if $block1
164+
(i32.eqz
165+
(v128.any_true
167166
(local.tee $7
168-
(i8x16.splat
169-
(local.get $1)
167+
(i8x16.eq
168+
(v128.load
169+
(local.tee $3
170+
(i32.and
171+
(local.get $0)
172+
(i32.const -16)
173+
)
174+
)
175+
)
176+
(local.tee $8
177+
(i8x16.splat
178+
(local.get $1)
179+
)
180+
)
170181
)
171182
)
172183
)
173184
)
174185
)
175-
(then
176-
(br_if $block
177-
(local.tee $1
186+
(br_if $block1
187+
(i32.eqz
188+
(local.tee $5
178189
(i32.and
179190
(i8x16.bitmask
180-
(local.get $6)
191+
(local.get $7)
181192
)
182193
(i32.shl
183194
(i32.const -1)
@@ -187,42 +198,61 @@
187198
)
188199
)
189200
)
201+
(local.set $1
202+
(local.get $2)
203+
)
204+
(br $block2)
190205
)
191-
(br_if $block1
192-
(i32.gt_u
206+
(br_if $block
207+
(i32.lt_u
208+
(local.get $2)
193209
(local.tee $1
194210
(i32.sub
195-
(i32.add
196-
(local.get $2)
197-
(local.get $4)
211+
(local.get $2)
212+
(local.tee $3
213+
(i32.sub
214+
(i32.const 16)
215+
(local.get $4)
216+
)
198217
)
199-
(i32.const 16)
200218
)
201219
)
202-
(local.get $2)
220+
)
221+
)
222+
(br_if $block
223+
(i32.eqz
224+
(local.get $1)
203225
)
204226
)
205227
(local.set $3
206228
(i32.add
207-
(i32.sub
208-
(local.get $0)
209-
(local.get $4)
210-
)
211-
(i32.const 16)
229+
(local.get $0)
230+
(local.get $3)
212231
)
213232
)
214-
(block $block2
233+
(block $block3
215234
(loop $label
216-
(br_if $block2
235+
(br_if $block3
217236
(v128.any_true
218-
(local.tee $6
237+
(local.tee $7
219238
(i8x16.eq
220239
(v128.load
221240
(local.get $3)
222241
)
223-
(local.get $7)
242+
(local.get $8)
243+
)
244+
)
245+
)
246+
)
247+
(br_if $block
248+
(i32.gt_u
249+
(local.tee $0
250+
(i32.sub
251+
(local.get $1)
252+
(i32.const 16)
224253
)
225254
)
255+
(local.get $1)
226256
)
227257
)
228258
(local.set $3
@@ -232,35 +262,49 @@
232262
)
233263
)
234264
(br_if $label
235-
(i32.ge_u
236-
(local.get $1)
237-
(local.tee $1
238-
(i32.sub
239-
(local.get $1)
240-
(i32.const 16)
265+
(i32.eqz
266+
(block (result i32)
267+
(local.set $scratch
268+
(i32.eq
269+
(local.get $1)
270+
(i32.const 16)
271+
)
272+
)
273+
(local.set $1
274+
(local.get $0)
241275
)
276+
(local.get $scratch)
242277
)
243278
)
244279
)
245280
)
246-
(br $block1)
281+
(br $block)
247282
)
248-
(local.set $1
283+
(local.set $5
249284
(i8x16.bitmask
250-
(local.get $6)
285+
(local.get $7)
251286
)
252287
)
253288
)
254-
(local.set $5
255-
(i32.add
256-
(local.get $3)
257-
(i32.ctz
289+
(local.set $6
290+
(select
291+
(i32.add
292+
(local.get $3)
293+
(local.tee $0
294+
(i32.ctz
295+
(local.get $5)
296+
)
297+
)
298+
)
299+
(i32.const 0)
300+
(i32.lt_u
301+
(local.get $0)
258302
(local.get $1)
259303
)
260304
)
261305
)
262306
)
263-
(local.get $5)
307+
(local.get $6)
264308
)
265309
(func $strlen (param $0 i32) (result i32)
266310
(local $1 i32)

sqlite3/libc/libc_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ func Benchmark_memchr(b *testing.B) {
116116
if got := call(memchr, ptr1, 5, size); got != ptr1+size/2 {
117117
b.Fatal(got)
118118
}
119+
if got := call(memchr, ptr1, 5, size/2); got != 0 {
120+
b.Fatal(got, ptr1+size/2)
121+
}
119122
}
120123

121124
func Benchmark_memcmp(b *testing.B) {

sqlite3/libc/string.h

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ void *memmove(void *dest, const void *src, size_t n) {
4545
// aligned address less than memory size.
4646
//
4747
// These also assume unaligned access is not painfully slow,
48-
// but that bitmask extraction is slow on AArch64.
48+
// but that bitmask extraction is really slow on AArch64.
4949

5050
__attribute__((weak))
5151
int memcmp(const void *v1, const void *v2, size_t n) {
@@ -75,13 +75,14 @@ void *memchr(const void *v, int c, size_t n) {
7575
const v128_t *w = (void *)(v - align);
7676
const v128_t wc = wasm_i8x16_splat(c);
7777

78-
while (true) {
78+
while (n) {
7979
const v128_t cmp = wasm_i8x16_eq(*w, wc);
8080
if (wasm_v128_any_true(cmp)) {
8181
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
8282
__builtin_assume(mask || align);
8383
if (mask) {
84-
return (void *)w + __builtin_ctz(mask);
84+
size_t ctz = __builtin_ctz(mask);
85+
return ctz < n ? (void *)w + ctz : NULL;
8586
}
8687
}
8788
if (__builtin_sub_overflow(n, sizeof(v128_t) - align, &n)) {
@@ -90,6 +91,7 @@ void *memchr(const void *v, int c, size_t n) {
9091
align = 0;
9192
w++;
9293
}
94+
return NULL;
9395
}
9496

9597
__attribute__((weak))
@@ -111,12 +113,7 @@ size_t strlen(const char *s) {
111113
}
112114
}
113115

114-
__attribute__((weak))
115-
int strcmp(const char *s1, const char *s2) {
116-
if (__builtin_constant_p(__builtin_strlen(s2))) {
117-
return strncmp(s1, s2, __builtin_strlen(s2));
118-
}
119-
116+
static int __strcmp(const char *s1, const char *s2) {
120117
const v128_t *const limit =
121118
(v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1;
122119

@@ -144,6 +141,14 @@ int strcmp(const char *s1, const char *s2) {
144141
return 0;
145142
}
146143

144+
__attribute__((weak, always_inline))
145+
int strcmp(const char *s1, const char *s2) {
146+
if (__builtin_constant_p(strlen(s2))) {
147+
return strncmp(s1, s2, strlen(s2));
148+
}
149+
return __strcmp(s1, s2);
150+
}
151+
147152
__attribute__((weak))
148153
int strncmp(const char *s1, const char *s2, size_t n) {
149154
const v128_t *const limit =
@@ -173,12 +178,7 @@ int strncmp(const char *s1, const char *s2, size_t n) {
173178
return 0;
174179
}
175180

176-
__attribute__((always_inline))
177181
static char *__strchrnul(const char *s, int c) {
178-
if (__builtin_constant_p(c) && (char)c == 0) {
179-
return (char *)s + strlen(s);
180-
}
181-
182182
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
183183
const v128_t *w = (void *)(s - align);
184184
const v128_t wc = wasm_i8x16_splat(c);
@@ -197,13 +197,19 @@ static char *__strchrnul(const char *s, int c) {
197197
}
198198
}
199199

200-
__attribute__((weak))
200+
__attribute__((weak, always_inline))
201201
char *strchrnul(const char *s, int c) {
202+
if (__builtin_constant_p(c) && (char)c == 0) {
203+
return (char *)s + strlen(s);
204+
}
202205
return __strchrnul(s, c);
203206
}
204207

205-
__attribute__((weak))
208+
__attribute__((weak, always_inline))
206209
char *strchr(const char *s, int c) {
210+
if (__builtin_constant_p(c) && (char)c == 0) {
211+
return (char *)s + strlen(s);
212+
}
207213
char *r = __strchrnul(s, c);
208214
return *(char *)r == (char)c ? r : NULL;
209215
}

0 commit comments

Comments
 (0)