Skip to content

Commit 5fb5ca6

Browse files
authored
Merge: UTF-8 "Find-All" Iterators for Python 🐍
2 parents 682d2ba + 4b6f40f commit 5fb5ca6

File tree

7 files changed

+1251
-669
lines changed

7 files changed

+1251
-669
lines changed

.git-blame-ignore-revs

Lines changed: 50 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,61 @@
1-
6512f1d129aeddc8601c9df7332c135038914b68
2-
fc9e5d61e5fb1c5031f6f10920f6b50e2530de1e
3-
ad2af78f8651870727c5b39e1fea2eff26d71d2f
4-
49e8d9d240993bdf68715a9c87824a032752798d
5-
fc408fa0a0f2d947c610568bd7a5c4a60ecca443
6-
b835051c09a0ecfc420932de444f3c6839610764
1+
085d2d3c8b99e0f90d320dd027040e554e410929
2+
08d0a20d35d3b29a44b9c8a826d53435c3ef839c
3+
0d982a45f842287d7e344f0d8b360f52482017f5
4+
10d829efcb8ed4cfa5f2db4050f8403184484423
5+
14ba3bf3c43408438a7de9ad57118c747c1347b1
76
1ba7982559111d4fc9b58caa7bc7aa1c6e64257c
8-
5b55e19d1378c61da88309b30a38f9cf7c64bf79
9-
be4c63d926c8628451726863e4d14dbd1ea374dd
10-
8b401bd41e4bd9c29c8fad9a5b83d8232efa50c7
7+
1f60e6d7c81f0e285e594eb63fee6119e05a3e69
8+
22e3d1e34d62d68c1e89df7c8bdc201faa18a9de
119
295d49a38d66b08075357ac829ad66d80b5edab0
1210
2a1fcd113d217e3124f6501c38e93a318aca37f0
1311
2f7652141bd8dc3c2c38ab34321567bfcdb91d93
14-
9e3180019acffe5261f0a1713b4ea324dca79ea0
15-
45e57eefd796841cbd14ee7f75ec42b42b5bde0c
16-
66778d6b2b3aa0eed27e32fbdceef79b8c54eda5
17-
c357c3ea756523d3bcc8d8f25068ad08aef5456d
18-
9b1948b3771c21dd56954e5f43301ca8a0b8b1a9
19-
cbfe5c7ac6371047eae88621b092297474d0b82a
20-
085d2d3c8b99e0f90d320dd027040e554e410929
12+
320bddd1eef68d53fdd67ecd2694c677be84b037
2113
3464cb428ae9a8721ab82a8c4bff214aa9ce6254
22-
5d0d2da422c7df96f9613ec843cd47c579a2edce
23-
89c46810c2f9bfafa31f8592339f9a1b45dcc245
14+
36d6ba72687024ee2e4e015b3e6f5a9741ca58cd
2415
3f9c248fbf59add2246055462e8fc19dc9f1693b
25-
e23c35ff2c2d4ccb752f4ffbf9b6f39a1677b532
16+
45e57eefd796841cbd14ee7f75ec42b42b5bde0c
17+
467b4b81cb4bc0e9a64844748a417762378918c9
18+
49e8d9d240993bdf68715a9c87824a032752798d
19+
5b55e19d1378c61da88309b30a38f9cf7c64bf79
20+
5d0d2da422c7df96f9613ec843cd47c579a2edce
21+
6512f1d129aeddc8601c9df7332c135038914b68
22+
66778d6b2b3aa0eed27e32fbdceef79b8c54eda5
23+
74e3b6fce1a94820c26ab0d91efe08a483d1368d
2624
7fdc58fd26e06c41052287d47a9c729c068a95ca
27-
10d829efcb8ed4cfa5f2db4050f8403184484423
28-
d74e5dca2e62eb0078cb2ebacc0dac2b8bb92d54
29-
1f60e6d7c81f0e285e594eb63fee6119e05a3e69
30-
a6768af38b40307fe66364403f141c285b3e164c
31-
08d0a20d35d3b29a44b9c8a826d53435c3ef839c
32-
9e9f2567d052d635722921a1d70ec63d69ec6669
25+
86f5a23a6d54e0f11d04c4d5e9a64e3a558b4308
26+
87c14650b16a57e5b33f41b95835fc9024597845
27+
89c46810c2f9bfafa31f8592339f9a1b45dcc245
28+
89d553041e5618289cba61117f254e3963768ca5
29+
8b401bd41e4bd9c29c8fad9a5b83d8232efa50c7
30+
8cb0742b2d1b31b61fac5272f17017953c6677e6
3331
974ed78822dc0b519dd61bc1c4dc18d59fe4ad15
34-
b007ba571860e1d3737d1478c7f8d66ae1839e36
35-
14ba3bf3c43408438a7de9ad57118c747c1347b1
32+
9b1948b3771c21dd56954e5f43301ca8a0b8b1a9
33+
9e3180019acffe5261f0a1713b4ea324dca79ea0
3634
9e577be71dcd2e20854bf55f08c54854b3e82989
37-
8cb0742b2d1b31b61fac5272f17017953c6677e6
35+
9e9f2567d052d635722921a1d70ec63d69ec6669
36+
a6768af38b40307fe66364403f141c285b3e164c
37+
aa822968f9f9dfb596032d26ccbd3a286a5890a7
38+
ad2af78f8651870727c5b39e1fea2eff26d71d2f
39+
af181210c2446c942ed60c476efa866a148ad965
40+
af3ce4a6bd7cfdd7c438cb8c0238e783c08c5678
41+
b007ba571860e1d3737d1478c7f8d66ae1839e36
42+
b6b7825dce8c71f5e0ad8d01196c881156b6b710
43+
b835051c09a0ecfc420932de444f3c6839610764
3844
bd547453122e9f8565e5be15f137e7b0de37caca
39-
22e3d1e34d62d68c1e89df7c8bdc201faa18a9de
45+
be4c63d926c8628451726863e4d14dbd1ea374dd
46+
c34f94ce7c8b900f5878df383f55b230e7dc15dd
47+
c357c3ea756523d3bcc8d8f25068ad08aef5456d
48+
c811f3ba0cb83b156b2418e41e67405107e611b7
49+
cbfe5c7ac6371047eae88621b092297474d0b82a
50+
cc2b5a6b9cb941becf10ad3631dfa28620a71f10
51+
d74e5dca2e62eb0078cb2ebacc0dac2b8bb92d54
52+
d7e4c584e2f0e81fe3a696bab217ab5004a1ef45
53+
d9690bfa3194e266eeac0b5f3754d8e0f969cc6d
54+
de6ea0dd063b5e0850773a7ab7d4428e3e9ae4f6
55+
e0f8e220876952968eab0a8aee617eaf007f7bbd
56+
e23c35ff2c2d4ccb752f4ffbf9b6f39a1677b532
4057
ecb377541d0c706cf8997faff4f026b07e3f76f3
41-
0d982a45f842287d7e344f0d8b360f52482017f5
42-
467b4b81cb4bc0e9a64844748a417762378918c9
43-
74e3b6fce1a94820c26ab0d91efe08a483d1368d
44-
320bddd1eef68d53fdd67ecd2694c677be84b037
58+
efbfc27692263c3e4155bdd1561b8af36ae6537e
59+
f26ffc8d1ff373938be539e3e7bae0d51431eb8d
60+
fc408fa0a0f2d947c610568bd7a5c4a60ecca443
61+
fc9e5d61e5fb1c5031f6f10920f6b50e2530de1e

.github/workflows/release.yml

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,12 @@ jobs:
545545
name: Publish JavaScript
546546
needs: versioning
547547
runs-on: ubuntu-24.04
548+
environment:
549+
name: npm-stringzilla
550+
url: https://www.npmjs.com/package/stringzilla
551+
permissions:
552+
contents: read
553+
id-token: write
548554
steps:
549555
- name: Checkout
550556
uses: actions/checkout@v5
@@ -556,16 +562,15 @@ jobs:
556562
uses: actions/setup-node@v5
557563
with:
558564
node-version: 20
565+
registry-url: "https://registry.npmjs.org"
559566
- name: Install dependencies
560-
run: npm install
561-
- name: Clean install dependencies
562-
run: npm install
567+
run: npm ci
563568
- name: Run tests
564569
run: npm test
565570
- name: Publish to NPM
566-
uses: JS-DevTools/npm-publish@v3
567-
with:
568-
token: ${{ secrets.NPM_TOKEN }}
571+
run: npm publish --provenance --access public
572+
env:
573+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
569574

570575
create_linux_deb_package:
571576
name: Create Debian Package for ${{ matrix.arch }}
@@ -666,7 +671,7 @@ jobs:
666671
persist-credentials: false
667672
ref: "main"
668673
submodules: recursive
669-
674+
670675
- uses: ilammy/msvc-dev-cmd@v1
671676
with:
672677
arch: ${{ matrix.arch }}

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,15 @@ import stringzilla as sz
578578
sz.utf8_case_insensitive_find('Der große Hund', 'GROSSE') # 4 — finds "große" at codepoint 4
579579
sz.utf8_case_insensitive_find('Straße', 'STRASSE') # 0 — ß matches "SS"
580580
sz.utf8_case_insensitive_find('efficient', 'EFFICIENT') # 0 — ffi ligature matches "FFI"
581+
582+
# Iterator for finding ALL matches
583+
haystack = 'Straße STRASSE strasse'
584+
for match in sz.utf8_case_insensitive_find_iter(haystack, 'strasse'):
585+
print(match, match.offset_within(haystack)) # Yields: 'Straße', 'STRASSE', 'strasse'
586+
587+
# With overlapping matches
588+
list(sz.utf8_case_insensitive_find_iter('aaaa', 'aa')) # ['aa', 'aa'] — 2 non-overlapping
589+
list(sz.utf8_case_insensitive_find_iter('aaaa', 'aa', include_overlapping=True)) # 3 matches
581590
```
582591

583592
### Collection-Level Operations

c/stringzilla.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ SZ_DYNAMIC void sz_sha256_state_update(sz_sha256_state_t *state, sz_cptr_t data,
428428
sz_dispatch_table.sha256_state_update(state, data, length);
429429
}
430430

431-
SZ_DYNAMIC void sz_sha256_state_digest(sz_sha256_state_t const *state, sz_u8_t *digest) {
431+
SZ_DYNAMIC void sz_sha256_state_digest(sz_sha256_state_t const *state, sz_u8_t digest[sz_at_least_(32)]) {
432432
sz_dispatch_table.sha256_state_digest(state, digest);
433433
}
434434

@@ -452,7 +452,7 @@ SZ_DYNAMIC void sz_fill(sz_ptr_t target, sz_size_t length, sz_u8_t value) {
452452
sz_dispatch_table.fill(target, length, value);
453453
}
454454

455-
SZ_DYNAMIC void sz_lookup(sz_ptr_t target, sz_size_t length, sz_cptr_t source, sz_cptr_t lut) {
455+
SZ_DYNAMIC void sz_lookup(sz_ptr_t target, sz_size_t length, sz_cptr_t source, char const lut[sz_at_least_(256)]) {
456456
sz_dispatch_table.lookup(target, length, source, lut);
457457
}
458458

0 commit comments

Comments
 (0)