Skip to content

Commit 3306f2c

Browse files
committed
add nextMatches function
1 parent 07a384d commit 3306f2c

File tree

4 files changed

+160
-16
lines changed

4 files changed

+160
-16
lines changed

packages/gearhash-wasm/README.md

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,8 @@ const data = new Uint8Array(1000000); // Example: 1MB of data
1313

1414
// Search for a pattern with a specific mask
1515
const mask = 0x0000d90003530000n; // Example mask as a BigInt
16-
const matchResult = nextMatch(data, mask);
17-
18-
// matchIndex will be the position where the pattern was found
19-
// or -1 if no match was found
16+
const match = nextMatch(data, mask);
17+
const allMatches = nextMatches(data, mask).matches;
2018
```
2119

2220
The `nextMatch` function takes two parameters:
@@ -31,27 +29,56 @@ You can continuously feed data like this:
3129
let hash = 0n;
3230
const mask = 0x0000d90003530000n;
3331

34-
let position = 0;
32+
let length = 0; // extra length not processed
3533
for await (const chunk of dataSource) {
3634
let index = 0;
3735
while (1) {
3836
let match = nextMatch(chunk.subArray(index), mask, hash);
3937

4038
if (match.position !== -1) {
4139
console.log({
42-
position: match.position + position,
40+
length: match.position + length,
4341
hash: match.hash
4442
})
4543

4644
index += match.position;
47-
position = 0;
45+
length = 0;
4846
hash = 0n;
4947
} else {
50-
position += chunk.length - index;
48+
length += chunk.length - index;
5149
break;
5250
}
5351
}
5452
}
5553

56-
console.log(position, "bytes without a match, ending hash: ", hash);
57-
```
54+
console.log(length, "bytes without a match, ending hash: ", hash);
55+
```
56+
57+
or, more performant with `nextMatches`:
58+
59+
```javascript
60+
let hash = 0n;
61+
const mask = 0x0000d90003530000n;
62+
63+
let length = 0;
64+
for await (const chunk of dataSource) {
65+
const result = nextMatches(chunk, mask, hash);
66+
let lastPosition = 0;
67+
for (const match of result.matches) {
68+
console.log({
69+
length: match.position - lastPosition + length,
70+
hash: match.hash
71+
});
72+
73+
length = 0;
74+
lastPosition = match.position;
75+
}
76+
length = result.remaining;
77+
hash = result.hash;
78+
}
79+
80+
console.log(length, "bytes without a match, ending hash: ", hash);
81+
```
82+
83+
## Possible improvements
84+
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
export { DEFAULT_TABLE } from "./table";
2-
export { nextMatch } from "./next-match";
2+
export { nextMatch, nextMatches } from "./next-match";

packages/gearhash-wasm/assembly/next-match.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,26 @@ export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0): MatchResul
2121

2222
return { position: -1, hash }; // Return -1 position to indicate no match found, along with the final hash
2323
}
24+
25+
export class NextMatchesResult {
26+
matches: MatchResult[] = [];
27+
hash: u64 = 0;
28+
remaining: i32 = 0;
29+
}
30+
31+
export function nextMatches(buf: Uint8Array, mask: u64, hash: u64 = 0): NextMatchesResult {
32+
const result = new NextMatchesResult();
33+
34+
let match = nextMatch(buf, mask, hash);
35+
let position = 0;
36+
while (match.position !== -1) {
37+
result.matches.push(match);
38+
position += match.position;
39+
match = nextMatch(buf.subarray(position), mask, 0);
40+
}
41+
42+
result.remaining = buf.length - position;
43+
result.hash = match.hash;
44+
45+
return result;
46+
}
Lines changed: 99 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import assert from "assert";
2-
import { nextMatch } from "../build/debug.js";
2+
import { nextMatch, nextMatches } from "../build/debug.js";
33

44
// Simple seeded random number generator
55
function seededRandom(seed) {
@@ -12,12 +12,106 @@ function seededRandom(seed) {
1212
// Create seeded random data
1313
const seed = 12345; // Fixed seed for deterministic results
1414
const random = seededRandom(seed);
15-
const randomData = new Uint8Array(1000000).map(() => Math.floor(random() * 256));
15+
const randomData = new Uint8Array(150_000).map(() => Math.floor(random() * 256));
1616

1717
// Test with a known mask
1818
assert.deepStrictEqual(nextMatch(randomData, 0x0000d90003530000n), { position: 459, hash: 9546224108073667431n });
19-
assert.deepStrictEqual(nextMatch(randomData.subarray(128), 0x0000d90003530000n), {
20-
position: 331,
21-
hash: 9546224108073667431n,
19+
assert.deepStrictEqual(nextMatch(randomData.subarray(459), 0x0000d90003530000n), {
20+
position: 3658,
21+
hash: 4043712133052525799n,
2222
});
23+
24+
assert.deepStrictEqual(nextMatches(randomData, 0x0000d90003530000n), {
25+
remaining: 1206,
26+
hash: 18262966296195680063n,
27+
matches: [
28+
{ position: 459, hash: 9546224108073667431n },
29+
{ position: 3658, hash: 4043712133052525799n },
30+
{ position: 2013, hash: 6111702085179831561n },
31+
{ position: 1593, hash: 12901166541873917249n },
32+
{ position: 1566, hash: 7692186462913612151n },
33+
{ position: 211, hash: 16543980755458487441n },
34+
{ position: 1778, hash: 15644384556715661587n },
35+
{ position: 566, hash: 9793366463237592247n },
36+
{ position: 2079, hash: 11221321116171663064n },
37+
{ position: 2940, hash: 1564726223525919786n },
38+
{ position: 809, hash: 15395839328876515337n },
39+
{ position: 946, hash: 10585747199093122759n },
40+
{ position: 854, hash: 4479393852251501569n },
41+
{ position: 436, hash: 15702966577303948694n },
42+
{ position: 2165, hash: 17148900940125069205n },
43+
{ position: 273, hash: 11505890591385615424n },
44+
{ position: 1459, hash: 10774060112464860369n },
45+
{ position: 158, hash: 2233823235057951370n },
46+
{ position: 7, hash: 1983310208686139647n },
47+
{ position: 1926, hash: 4499661659570185271n },
48+
{ position: 1529, hash: 16090517590946392505n },
49+
{ position: 1751, hash: 12536054222087023458n },
50+
{ position: 1222, hash: 334146166487300408n },
51+
{ position: 2230, hash: 6981431015531396608n },
52+
{ position: 826, hash: 11877997991061156988n },
53+
{ position: 33, hash: 8454422284689001989n },
54+
{ position: 1731, hash: 15095819886766624527n },
55+
{ position: 8842, hash: 6362744947164356842n },
56+
{ position: 928, hash: 3627691864743766239n },
57+
{ position: 684, hash: 1137480049753900759n },
58+
{ position: 5301, hash: 10541554813326859395n },
59+
{ position: 2546, hash: 14704288147532701373n },
60+
{ position: 11856, hash: 9653226176528805511n },
61+
{ position: 650, hash: 12714262162290274678n },
62+
{ position: 1346, hash: 2525679969999819421n },
63+
{ position: 353, hash: 2532749299807420736n },
64+
{ position: 1091, hash: 693561665209300041n },
65+
{ position: 729, hash: 11014435606385442344n },
66+
{ position: 1204, hash: 10083883741570968570n },
67+
{ position: 1671, hash: 12308901096302322810n },
68+
{ position: 1362, hash: 13399339535394154305n },
69+
{ position: 1858, hash: 792389713896955383n },
70+
{ position: 2248, hash: 15568664728418446816n },
71+
{ position: 1790, hash: 4328805983976714464n },
72+
{ position: 634, hash: 722305044694988273n },
73+
{ position: 741, hash: 17978970776495983968n },
74+
{ position: 901, hash: 5911861036065769110n },
75+
{ position: 302, hash: 1334790489764850513n },
76+
{ position: 1435, hash: 16174119877357924758n },
77+
{ position: 61, hash: 12103430617785210167n },
78+
{ position: 1, hash: 35334639850667n },
79+
{ position: 2074, hash: 7449519750512442798n },
80+
{ position: 2061, hash: 1805950971475184864n },
81+
{ position: 1612, hash: 5837797879339327135n },
82+
{ position: 3281, hash: 6649572008787195357n },
83+
{ position: 39, hash: 16137242368496690753n },
84+
{ position: 263, hash: 8133543763164586431n },
85+
{ position: 2333, hash: 17019949823094703325n },
86+
{ position: 1160, hash: 8949503946391874147n },
87+
{ position: 641, hash: 18344573417262448121n },
88+
{ position: 2588, hash: 13345294745157777411n },
89+
{ position: 3116, hash: 7832639641689314418n },
90+
{ position: 4671, hash: 13762161036402935807n },
91+
{ position: 276, hash: 10924644382434953404n },
92+
{ position: 4430, hash: 9045519457622973922n },
93+
{ position: 32, hash: 4188636638659752674n },
94+
{ position: 2470, hash: 1184167847892138852n },
95+
{ position: 694, hash: 11699508361075635892n },
96+
{ position: 1703, hash: 9012268790677532920n },
97+
{ position: 47, hash: 6528251874505412319n },
98+
{ position: 2672, hash: 8484789019946020371n },
99+
{ position: 202, hash: 1365160724288031760n },
100+
{ position: 467, hash: 10426152000837661087n },
101+
{ position: 496, hash: 3605417399306471847n },
102+
{ position: 3777, hash: 8410473338876477323n },
103+
{ position: 80, hash: 3693273711429567121n },
104+
{ position: 813, hash: 9224216742837123228n },
105+
{ position: 3115, hash: 5150752707627454542n },
106+
{ position: 806, hash: 8797260981186887018n },
107+
{ position: 4915, hash: 1483374079741560715n },
108+
{ position: 2118, hash: 1742900153494554703n },
109+
{ position: 1515, hash: 4635371751468227093n },
110+
{ position: 2393, hash: 15282968615371427111n },
111+
{ position: 4331, hash: 4659818917792066036n },
112+
{ position: 1188, hash: 3862441883651577693n },
113+
{ position: 2663, hash: 8524789558855117254n },
114+
],
115+
});
116+
23117
console.log("ok");

0 commit comments

Comments
 (0)