Skip to content

Commit fa9d58e

Browse files
cometkimclaude
andauthored
Optimize grapheme cluster boundary checking (#82)
Co-Authored-By: Claude <[email protected]>
1 parent f2018ed commit fa9d58e

File tree

3 files changed

+45
-45
lines changed

3 files changed

+45
-45
lines changed

.changeset/upset-mugs-fix.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"unicode-segmenter": patch
3+
---
4+
5+
Optimize grapheme cluster boundary checking.

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
256256

257257
| Name | Unicode® | ESM? | Size | Size (min) | Size (min+gzip) | Size (min+br) |
258258
|------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|
259-
| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 16,003 | 12,153 | 5,065 | 3,775 |
259+
| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 16,036 | 12,226 | 5,081 | 3,798 |
260260
| `graphemer` | 15.0.0 | ✖️ ️| 410,435 | 95,104 | 15,752 | 10,660 |
261261
| `grapheme-splitter` | 10.0.0 | ✖️ | 122,252 | 23,680 | 7,852 | 4,841 |
262262
| `@formatjs/intl-segmenter`* | 15.0.0 | ✖️ | 603,285 | 369,560 | 72,218 | 49,416 |
@@ -272,7 +272,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
272272

273273
| Name | Bytecode size | Bytecode size (gzip)* |
274274
|------------------------------|--------------:|----------------------:|
275-
| `unicode-segmenter/grapheme` | 22,087 | 11,548 |
275+
| `unicode-segmenter/grapheme` | 22,002 | 11,468 |
276276
| `graphemer` | 133,978 | 31,713 |
277277
| `grapheme-splitter` | 63,835 | 19,137 |
278278

src/grapheme.js

Lines changed: 38 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -296,58 +296,53 @@ function isBoundary(catBefore, catAfter, risCount, emoji, incb) {
296296
return true;
297297
}
298298

299-
// GB6
300-
if (
301-
catBefore === 5 &&
302-
(catAfter === 5 || catAfter === 7 || catAfter === 8 || catAfter === 13)
303-
) {
299+
// Most common cases - GB9, GB9a extend rules
300+
if (catAfter === 3 || catAfter === 14 || catAfter === 11) {
304301
return false;
305302
}
306303

307-
// GB7
308-
if (
309-
(catBefore === 7 || catBefore === 13) &&
310-
(catAfter === 12 || catAfter === 13)
311-
) {
312-
return false;
313-
}
314-
315-
// GB8
316-
if (
317-
catAfter === 12 &&
318-
(catBefore === 8 || catBefore === 12)
319-
) {
320-
return false;
321-
}
304+
// GB6 - L x (L | V | LV | LVT)
305+
if (catBefore === 5) {
306+
if (catAfter === 5 || catAfter === 7 || catAfter === 8 || catAfter === 13) {
307+
return false;
308+
}
322309

323-
// GB9
324-
if (catAfter === 3 || catAfter === 14) {
325-
return false;
326-
}
310+
} else {
311+
// GB7 - (LV | V) x (V | T)
312+
if (
313+
(catBefore === 7 || catBefore === 13) &&
314+
(catAfter === 13 || catAfter === 12)
315+
) {
316+
return false;
317+
}
327318

328-
// GB9a
329-
if (catAfter === 11) {
330-
return false;
331-
}
319+
// GB8 - (LVT | T) x T
320+
if (
321+
(catBefore === 8 || catBefore === 12) &&
322+
catAfter === 12
323+
) {
324+
return false;
325+
}
332326

333-
// GB9b
334-
if (catBefore === 9) {
335-
return false;
336-
}
327+
// GB9b
328+
if (catBefore === 9) {
329+
return false;
330+
}
337331

338-
// GB9c
339-
if (catAfter === 0 && incb) {
340-
return false;
341-
}
332+
// GB9c
333+
if (catAfter === 0 && incb) {
334+
return false;
335+
}
342336

343-
// GB11
344-
if (catBefore === 14 && catAfter === 4) {
345-
return !emoji;
346-
}
337+
// GB11
338+
if (catBefore === 14 && catAfter === 4) {
339+
return !emoji;
340+
}
347341

348-
// GB12, GB13
349-
if (catBefore === 10 && catAfter === 10) {
350-
return risCount % 2 === 0;
342+
// GB12, GB13
343+
if (catBefore === 10 && catAfter === 10) {
344+
return risCount % 2 === 0;
345+
}
351346
}
352347

353348
// GB999

0 commit comments

Comments
 (0)