Skip to content

Commit b64b9b0

Browse files
committed
Better strcasestr.
1 parent 9142e19 commit b64b9b0

File tree

3 files changed

+112
-79
lines changed

3 files changed

+112
-79
lines changed

sqlite3/libc/libc.wasm

-98 Bytes
Binary file not shown.

sqlite3/libc/libc.wat

Lines changed: 102 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -3700,10 +3700,13 @@
37003700
(local $6 i32)
37013701
(local $7 i32)
37023702
(local $8 i32)
3703-
(local $9 v128)
3703+
(local $9 i32)
37043704
(local $10 v128)
37053705
(local $11 v128)
37063706
(local $12 v128)
3707+
(local $13 v128)
3708+
(local $14 v128)
3709+
(local $15 v128)
37073710
(if
37083711
(i32.eqz
37093712
(local.tee $2
@@ -3718,7 +3721,7 @@
37183721
)
37193722
)
37203723
)
3721-
(local.set $5
3724+
(local.set $3
37223725
(i32.extend8_s
37233726
(local.get $2)
37243727
)
@@ -3738,33 +3741,33 @@
37383741
)
37393742
)
37403743
)
3741-
(local.set $4
3744+
(local.set $5
37423745
(local.get $6)
37433746
)
37443747
(loop $label
37453748
(if
37463749
(i32.ne
3747-
(local.tee $3
3750+
(local.tee $4
37483751
(i32.load8_u
37493752
(i32.add
37503753
(local.get $1)
3751-
(local.get $4)
3754+
(local.get $5)
37523755
)
37533756
)
37543757
)
37553758
(local.get $2)
37563759
)
37573760
(then
37583761
(local.set $2
3759-
(local.get $3)
3762+
(local.get $4)
37603763
)
37613764
(br $block)
37623765
)
37633766
)
37643767
(br_if $label
3765-
(local.tee $4
3768+
(local.tee $5
37663769
(i32.sub
3767-
(local.get $4)
3770+
(local.get $5)
37683771
(i32.const 1)
37693772
)
37703773
)
@@ -3778,27 +3781,43 @@
37783781
)
37793782
)
37803783
)
3781-
(local.set $4
3784+
(local.set $5
37823785
(local.get $6)
37833786
)
37843787
)
3785-
(local.set $3
3788+
(local.set $4
37863789
(select
37873790
(i32.or
3788-
(local.get $5)
3791+
(local.get $3)
37893792
(i32.const 32)
37903793
)
3791-
(local.get $5)
3794+
(local.get $3)
37923795
(i32.lt_u
37933796
(i32.sub
3794-
(local.get $5)
3797+
(local.get $3)
37953798
(i32.const 65)
37963799
)
37973800
(i32.const 26)
37983801
)
37993802
)
38003803
)
3801-
(local.set $2
3804+
(local.set $3
3805+
(select
3806+
(i32.and
3807+
(local.get $3)
3808+
(i32.const 95)
3809+
)
3810+
(local.get $3)
3811+
(i32.lt_u
3812+
(i32.sub
3813+
(local.get $3)
3814+
(i32.const 97)
3815+
)
3816+
(i32.const 26)
3817+
)
3818+
)
3819+
)
3820+
(local.set $8
38023821
(select
38033822
(i32.or
38043823
(local.tee $2
@@ -3818,37 +3837,63 @@
38183837
)
38193838
)
38203839
)
3840+
(local.set $2
3841+
(select
3842+
(i32.and
3843+
(local.get $2)
3844+
(i32.const 95)
3845+
)
3846+
(local.get $2)
3847+
(i32.lt_u
3848+
(i32.sub
3849+
(local.get $2)
3850+
(i32.const 97)
3851+
)
3852+
(i32.const 26)
3853+
)
3854+
)
3855+
)
38213856
(block $block3
38223857
(block $block4
38233858
(block $block1
38243859
(br_if $block1
38253860
(i32.lt_u
3826-
(local.tee $5
3861+
(local.tee $9
38273862
(i32.sub
38283863
(i32.sub
38293864
(i32.shl
38303865
(memory.size)
38313866
(i32.const 16)
38323867
)
3833-
(local.get $4)
3868+
(local.get $5)
38343869
)
38353870
(i32.const 16)
38363871
)
38373872
)
38383873
(local.get $0)
38393874
)
38403875
)
3841-
(local.set $10
3876+
(local.set $11
3877+
(i8x16.splat
3878+
(local.get $4)
3879+
)
3880+
)
3881+
(local.set $12
38423882
(i8x16.splat
38433883
(local.get $3)
38443884
)
38453885
)
3846-
(local.set $11
3886+
(local.set $13
3887+
(i8x16.splat
3888+
(local.get $8)
3889+
)
3890+
)
3891+
(local.set $14
38473892
(i8x16.splat
38483893
(local.get $2)
38493894
)
38503895
)
3851-
(local.set $8
3896+
(local.set $3
38523897
(i32.add
38533898
(local.get $1)
38543899
(i32.const 1)
@@ -3857,60 +3902,44 @@
38573902
(loop $label2
38583903
(local.set $2
38593904
(i8x16.all_true
3860-
(local.tee $9
3861-
(v128.or
3862-
(v128.bitselect
3863-
(v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020)
3864-
(v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
3865-
(i8x16.gt_s
3866-
(i8x16.add
3867-
(local.tee $9
3868-
(v128.load align=1
3869-
(local.get $0)
3870-
)
3871-
)
3872-
(v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525)
3873-
)
3874-
(v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565)
3875-
)
3876-
)
3877-
(local.get $9)
3905+
(local.tee $10
3906+
(v128.load align=1
3907+
(local.get $0)
38783908
)
38793909
)
38803910
)
38813911
)
38823912
(block $block2
38833913
(if
38843914
(v128.any_true
3885-
(local.tee $9
3915+
(local.tee $10
38863916
(v128.and
3887-
(i8x16.eq
3888-
(local.get $11)
3889-
(v128.or
3890-
(v128.bitselect
3891-
(v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020)
3892-
(v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
3893-
(i8x16.gt_s
3894-
(i8x16.add
3895-
(local.tee $12
3896-
(v128.load align=1
3897-
(i32.add
3898-
(local.get $0)
3899-
(local.get $4)
3900-
)
3901-
)
3902-
)
3903-
(v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525)
3917+
(v128.or
3918+
(i8x16.eq
3919+
(local.get $13)
3920+
(local.tee $15
3921+
(v128.load align=1
3922+
(i32.add
3923+
(local.get $0)
3924+
(local.get $5)
39043925
)
3905-
(v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565)
39063926
)
39073927
)
3908-
(local.get $12)
3928+
)
3929+
(i8x16.eq
3930+
(local.get $14)
3931+
(local.get $15)
39093932
)
39103933
)
3911-
(i8x16.eq
3912-
(local.get $10)
3913-
(local.get $9)
3934+
(v128.or
3935+
(i8x16.eq
3936+
(local.get $11)
3937+
(local.get $10)
3938+
)
3939+
(i8x16.eq
3940+
(local.get $12)
3941+
(local.get $10)
3942+
)
39143943
)
39153944
)
39163945
)
@@ -3925,7 +3954,7 @@
39253954
(i32.eqz
39263955
(local.tee $2
39273956
(i8x16.bitmask
3928-
(local.get $9)
3957+
(local.get $10)
39293958
)
39303959
)
39313960
)
@@ -3935,7 +3964,7 @@
39353964
(i32.eqz
39363965
(call $strncasecmp
39373966
(i32.add
3938-
(local.tee $3
3967+
(local.tee $4
39393968
(i32.add
39403969
(local.get $0)
39413970
(i32.ctz
@@ -3945,7 +3974,7 @@
39453974
)
39463975
(i32.const 1)
39473976
)
3948-
(local.get $8)
3977+
(local.get $3)
39493978
(local.get $6)
39503979
)
39513980
)
@@ -3979,12 +4008,12 @@
39794008
(i32.const 16)
39804009
)
39814010
)
3982-
(local.get $5)
4011+
(local.get $9)
39834012
)
39844013
)
39854014
)
39864015
)
3987-
(local.set $3
4016+
(local.set $4
39884017
(local.get $0)
39894018
)
39904019
(local.set $2
@@ -3997,7 +4026,7 @@
39974026
(i32.load8_s
39984027
(i32.add
39994028
(local.get $2)
4000-
(local.get $3)
4029+
(local.get $4)
40014030
)
40024031
)
40034032
)
@@ -4007,7 +4036,7 @@
40074036
(i32.eq
40084037
(select
40094038
(i32.or
4010-
(local.tee $4
4039+
(local.tee $5
40114040
(i32.load8_s
40124041
(i32.add
40134042
(local.get $1)
@@ -4017,10 +4046,10 @@
40174046
)
40184047
(i32.const 32)
40194048
)
4020-
(local.get $4)
4049+
(local.get $5)
40214050
(i32.lt_u
40224051
(i32.sub
4023-
(local.get $4)
4052+
(local.get $5)
40244053
(i32.const 65)
40254054
)
40264055
(i32.const 26)
@@ -4055,9 +4084,9 @@
40554084
)
40564085
)
40574086
(else
4058-
(local.set $3
4087+
(local.set $4
40594088
(i32.add
4060-
(local.get $3)
4089+
(local.get $4)
40614090
(i32.const 1)
40624091
)
40634092
)
@@ -4070,11 +4099,11 @@
40704099
)
40714100
(unreachable)
40724101
)
4073-
(local.set $3
4102+
(local.set $4
40744103
(i32.const 0)
40754104
)
40764105
)
4077-
(local.get $3)
4106+
(local.get $4)
40784107
)
40794108
(func $memccpy (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32)
40804109
(memory.copy

sqlite3/libc/string.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -616,18 +616,22 @@ char *strcasestr(const char *haystk, const char *needle) {
616616
while (i > 0 && needle[0] == needle[i]) i--;
617617
if (i == 0) i = sn - 1;
618618

619-
const v128_t fst = wasm_i8x16_splat(tolower(needle[0]));
620-
const v128_t lst = wasm_i8x16_splat(tolower(needle[i]));
619+
const v128_t fstl = wasm_i8x16_splat(tolower(needle[0]));
620+
const v128_t fstu = wasm_i8x16_splat(toupper(needle[0]));
621+
const v128_t lstl = wasm_i8x16_splat(tolower(needle[i]));
622+
const v128_t lstu = wasm_i8x16_splat(toupper(needle[i]));
621623

622624
// The last haystk offset for which loading blk_lst is safe.
623625
const char *H =
624626
(char *)(__builtin_wasm_memory_size(0) * PAGESIZE - i - sizeof(v128_t));
625627

626628
while (haystk <= H) {
627-
const v128_t blk_fst = __tolower8x16(wasm_v128_load((v128_t *)(haystk)));
628-
const v128_t blk_lst = __tolower8x16(wasm_v128_load((v128_t *)(haystk + i)));
629-
const v128_t eq_fst = wasm_i8x16_eq(fst, blk_fst);
630-
const v128_t eq_lst = wasm_i8x16_eq(lst, blk_lst);
629+
const v128_t blk_fst = wasm_v128_load((v128_t *)(haystk));
630+
const v128_t blk_lst = wasm_v128_load((v128_t *)(haystk + i));
631+
const v128_t eq_fst =
632+
wasm_i8x16_eq(fstl, blk_fst) | wasm_i8x16_eq(fstu, blk_fst);
633+
const v128_t eq_lst =
634+
wasm_i8x16_eq(lstl, blk_lst) | wasm_i8x16_eq(lstu, blk_lst);
631635

632636
const v128_t cmp = eq_fst & eq_lst;
633637
if (wasm_v128_any_true(cmp)) {

0 commit comments

Comments
 (0)