Skip to content

Commit d7aef63

Browse files
committed
Naming, volatile.
1 parent 64e5046 commit d7aef63

File tree

1 file changed

+51
-45
lines changed

1 file changed

+51
-45
lines changed

sqlite3/libc/string.h

Lines changed: 51 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ void *memmove(void *dest, const void *src, size_t n) {
3939
#ifdef __wasm_simd128__
4040

4141
__attribute__((weak))
42-
int memcmp(const void *v1, const void *v2, size_t n) {
42+
int memcmp(const void *vl, const void *vr, size_t n) {
4343
// Scalar algorithm.
4444
if (n < sizeof(v128_t)) {
45-
const unsigned char *u1 = (unsigned char *)v1;
46-
const unsigned char *u2 = (unsigned char *)v2;
45+
const unsigned char *u1 = (unsigned char *)vl;
46+
const unsigned char *u2 = (unsigned char *)vr;
4747
while (n--) {
4848
if (*u1 != *u2) return *u1 - *u2;
4949
u1++;
@@ -56,32 +56,32 @@ int memcmp(const void *v1, const void *v2, size_t n) {
5656
// Find the first different character in the objects.
5757
// Unaligned loads handle the case where the objects
5858
// have mismatching alignments.
59-
const v128_t *w1 = (v128_t *)v1;
60-
const v128_t *w2 = (v128_t *)v2;
59+
const v128_t *v1 = (v128_t *)vl;
60+
const v128_t *v2 = (v128_t *)vr;
6161
while (n) {
62-
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(w1), wasm_v128_load(w2));
62+
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(v1), wasm_v128_load(v2));
6363
// Bitmask is slow on AArch64, all_true is much faster.
6464
if (!wasm_i8x16_all_true(cmp)) {
6565
// Find the offset of the first zero bit (little-endian).
6666
size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
67-
const unsigned char *u1 = (unsigned char *)w1 + ctz;
68-
const unsigned char *u2 = (unsigned char *)w2 + ctz;
67+
const unsigned char *u1 = (unsigned char *)v1 + ctz;
68+
const unsigned char *u2 = (unsigned char *)v2 + ctz;
6969
// This may help the compiler if the function is inlined.
7070
__builtin_assume(*u1 - *u2 != 0);
7171
return *u1 - *u2;
7272
}
7373
// This makes n a multiple of sizeof(v128_t)
7474
// for every iteration except the first.
7575
size_t align = (n - 1) % sizeof(v128_t) + 1;
76-
w1 = (v128_t *)((char *)w1 + align);
77-
w2 = (v128_t *)((char *)w2 + align);
76+
v1 = (v128_t *)((char *)v1 + align);
77+
v2 = (v128_t *)((char *)v2 + align);
7878
n -= align;
7979
}
8080
return 0;
8181
}
8282

8383
__attribute__((weak))
84-
void *memchr(const void *v, int c, size_t n) {
84+
void *memchr(const void *s, int c, size_t n) {
8585
// When n is zero, a function that locates a character finds no occurrence.
8686
// Otherwise, decrement n to ensure sub_overflow overflows
8787
// when n would go equal-to-or-below zero.
@@ -92,12 +92,13 @@ void *memchr(const void *v, int c, size_t n) {
9292
// memchr must behave as if it reads characters sequentially
9393
// and stops as soon as a match is found.
9494
// Aligning ensures loads beyond the first match are safe.
95-
uintptr_t align = (uintptr_t)v % sizeof(v128_t);
96-
const v128_t *w = (v128_t *)((char *)v - align);
97-
const v128_t wc = wasm_i8x16_splat(c);
95+
// Volatile avoids compiler tricks around out of bounds loads.
96+
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
97+
const volatile v128_t *v = (v128_t *)((char *)s - align);
98+
const v128_t vc = wasm_i8x16_splat(c);
9899

99100
for (;;) {
100-
const v128_t cmp = wasm_i8x16_eq(*w, wc);
101+
const v128_t cmp = wasm_i8x16_eq(*v, vc);
101102
// Bitmask is slow on AArch64, any_true is much faster.
102103
if (wasm_v128_any_true(cmp)) {
103104
// Clear the bits corresponding to alignment (little-endian)
@@ -113,36 +114,36 @@ void *memchr(const void *v, int c, size_t n) {
113114
// That's a match, unless it is beyond the end of the object.
114115
// Recall that we decremented n, so less-than-or-equal-to is correct.
115116
size_t ctz = __builtin_ctz(mask);
116-
return ctz - align <= n ? (char *)w + ctz : NULL;
117+
return ctz - align <= n ? (char *)v + ctz : NULL;
117118
}
118119
}
119120
// Decrement n; if it overflows we're done.
120121
if (__builtin_sub_overflow(n, sizeof(v128_t) - align, &n)) {
121122
return NULL;
122123
}
123124
align = 0;
124-
w++;
125+
v++;
125126
}
126127
}
127128

128129
__attribute__((weak))
129-
void *memrchr(const void *v, int c, size_t n) {
130+
void *memrchr(const void *s, int c, size_t n) {
130131
// memrchr is allowed to read up to n bytes from the object.
131132
// Search backward for the last matching character.
132-
const v128_t *w = (v128_t *)((char *)v + n);
133-
const v128_t wc = wasm_i8x16_splat(c);
133+
const v128_t *v = (v128_t *)((char *)s + n);
134+
const v128_t vc = wasm_i8x16_splat(c);
134135
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
135-
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(--w), wc);
136+
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(--v), vc);
136137
// Bitmask is slow on AArch64, any_true is much faster.
137138
if (wasm_v128_any_true(cmp)) {
138139
// Find the offset of the last one bit (little-endian).
139140
size_t clz = __builtin_clz(wasm_i8x16_bitmask(cmp)) - 15;
140-
return (char *)(w + 1) - clz;
141+
return (char *)(v + 1) - clz;
141142
}
142143
}
143144

144145
// Scalar algorithm.
145-
const char *a = (char *)w;
146+
const char *a = (char *)v;
146147
while (n--) {
147148
if (*(--a) == (char)c) return (char *)a;
148149
}
@@ -154,12 +155,13 @@ size_t strlen(const char *s) {
154155
// strlen must stop as soon as it finds the terminator.
155156
// Aligning ensures loads beyond the terminator are safe.
156157
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
157-
const v128_t *w = (v128_t *)(s - align);
158+
const volatile v128_t *v = (v128_t *)(s - align);
158159

159160
for (;;) {
161+
const v128_t vv = *v;
160162
// Bitmask is slow on AArch64, all_true is much faster.
161-
if (!wasm_i8x16_all_true(*w)) {
162-
const v128_t cmp = wasm_i8x16_eq(*w, (v128_t){});
163+
if (!wasm_i8x16_all_true(vv)) {
164+
const v128_t cmp = wasm_i8x16_eq(vv, (v128_t){});
163165
// Clear the bits corresponding to alignment (little-endian)
164166
// so we can count trailing zeros.
165167
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
@@ -170,11 +172,11 @@ size_t strlen(const char *s) {
170172
// it's as if we didn't find anything.
171173
if (mask) {
172174
// Find the offset of the first one bit (little-endian).
173-
return (char *)w - s + __builtin_ctz(mask);
175+
return (char *)v - s + __builtin_ctz(mask);
174176
}
175177
}
176178
align = 0;
177-
w++;
179+
v++;
178180
}
179181
}
180182

@@ -268,12 +270,14 @@ int strncmp(const char *s1, const char *s2, size_t n) {
268270
static char *__strchrnul(const char *s, int c) {
269271
// strchrnul must stop as soon as it finds the terminator.
270272
// Aligning ensures loads beyond the terminator are safe.
273+
// Volatile avoids compiler tricks around out of bounds loads.
271274
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
272-
const v128_t *w = (v128_t *)(s - align);
273-
const v128_t wc = wasm_i8x16_splat(c);
275+
const volatile v128_t *v = (v128_t *)(s - align);
276+
const v128_t vc = wasm_i8x16_splat(c);
274277

275278
for (;;) {
276-
const v128_t cmp = wasm_i8x16_eq(*w, (v128_t){}) | wasm_i8x16_eq(*w, wc);
279+
const v128_t vv = *v;
280+
const v128_t cmp = wasm_i8x16_eq(vv, (v128_t){}) | wasm_i8x16_eq(vv, vc);
277281
// Bitmask is slow on AArch64, any_true is much faster.
278282
if (wasm_v128_any_true(cmp)) {
279283
// Clear the bits corresponding to alignment (little-endian)
@@ -286,11 +290,11 @@ static char *__strchrnul(const char *s, int c) {
286290
// it's as if we didn't find anything.
287291
if (mask) {
288292
// Find the offset of the first one bit (little-endian).
289-
return (char *)w + __builtin_ctz(mask);
293+
return (char *)v + __builtin_ctz(mask);
290294
}
291295
}
292296
align = 0;
293-
w++;
297+
v++;
294298
}
295299
}
296300

@@ -371,14 +375,15 @@ __attribute__((weak))
371375
size_t strspn(const char *s, const char *c) {
372376
// strspn must stop as soon as it finds the terminator.
373377
// Aligning ensures loads beyond the terminator are safe.
378+
// Volatile avoids compiler tricks around out of bounds loads.
374379
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
375-
const v128_t *w = (v128_t *)(s - align);
380+
const volatile v128_t *v = (v128_t *)(s - align);
376381

377382
if (!c[0]) return 0;
378383
if (!c[1]) {
379-
const v128_t wc = wasm_i8x16_splat(*c);
384+
const v128_t vc = wasm_i8x16_splat(*c);
380385
for (;;) {
381-
const v128_t cmp = wasm_i8x16_eq(*w, wc);
386+
const v128_t cmp = wasm_i8x16_eq(*v, vc);
382387
// Bitmask is slow on AArch64, all_true is much faster.
383388
if (!wasm_i8x16_all_true(cmp)) {
384389
// Clear the bits corresponding to alignment (little-endian)
@@ -391,11 +396,11 @@ size_t strspn(const char *s, const char *c) {
391396
// it's as if we didn't find anything.
392397
if (mask) {
393398
// Find the offset of the first one bit (little-endian).
394-
return (char *)w - s + __builtin_ctz(mask);
399+
return (char *)v - s + __builtin_ctz(mask);
395400
}
396401
}
397402
align = 0;
398-
w++;
403+
v++;
399404
}
400405
}
401406

@@ -407,7 +412,7 @@ size_t strspn(const char *s, const char *c) {
407412
}
408413

409414
for (;;) {
410-
const v128_t cmp = __wasm_v128_chkbits(bitmap, *w);
415+
const v128_t cmp = __wasm_v128_chkbits(bitmap, *v);
411416
// Bitmask is slow on AArch64, all_true is much faster.
412417
if (!wasm_i8x16_all_true(cmp)) {
413418
// Clear the bits corresponding to alignment (little-endian)
@@ -420,11 +425,11 @@ size_t strspn(const char *s, const char *c) {
420425
// it's as if we didn't find anything.
421426
if (mask) {
422427
// Find the offset of the first one bit (little-endian).
423-
return (char *)w - s + __builtin_ctz(mask);
428+
return (char *)v - s + __builtin_ctz(mask);
424429
}
425430
}
426431
align = 0;
427-
w++;
432+
v++;
428433
}
429434
}
430435

@@ -434,8 +439,9 @@ size_t strcspn(const char *s, const char *c) {
434439

435440
// strcspn must stop as soon as it finds the terminator.
436441
// Aligning ensures loads beyond the terminator are safe.
442+
// Volatile avoids compiler tricks around out of bounds loads.
437443
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
438-
const v128_t *w = (v128_t *)(s - align);
444+
const volatile v128_t *v = (v128_t *)(s - align);
439445

440446
__wasm_v128_bitmap256_t bitmap = {};
441447

@@ -445,7 +451,7 @@ size_t strcspn(const char *s, const char *c) {
445451
} while (*c++);
446452

447453
for (;;) {
448-
const v128_t cmp = __wasm_v128_chkbits(bitmap, *w);
454+
const v128_t cmp = __wasm_v128_chkbits(bitmap, *v);
449455
// Bitmask is slow on AArch64, any_true is much faster.
450456
if (wasm_v128_any_true(cmp)) {
451457
// Clear the bits corresponding to alignment (little-endian)
@@ -458,11 +464,11 @@ size_t strcspn(const char *s, const char *c) {
458464
// it's as if we didn't find anything.
459465
if (mask) {
460466
// Find the offset of the first one bit (little-endian).
461-
return (char *)w - s + __builtin_ctz(mask);
467+
return (char *)v - s + __builtin_ctz(mask);
462468
}
463469
}
464470
align = 0;
465-
w++;
471+
v++;
466472
}
467473
}
468474

0 commit comments

Comments
 (0)