diff --git a/README.md b/README.md index 8143b65..f58aad0 100644 --- a/README.md +++ b/README.md @@ -220,7 +220,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb | Name | Unicode® | ESM? | Size | Size (min) | Size (min+gzip) | Size (min+br) | Size (min+zstd) | |------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|----------------:| -| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 15,941 | 12,192 | 5,064 | 3,739 | 4,747 | +| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 15,921 | 12,189 | 5,060 | 3,773 | 4,746 | | `graphemer` | 15.0.0 | ✖️ ️| 410,435 | 95,104 | 15,752 | 10,660 | 15,911 | | `grapheme-splitter` | 10.0.0 | ✖️ | 122,252 | 23,680 | 7,852 | 4,841 | 6,750 | | `@formatjs/intl-segmenter`* | 15.0.0 | ✖️ | 603,285 | 369,560 | 72,218 | 49,416 | 67,975 | @@ -236,7 +236,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb | Name | Bytecode size | Bytecode size (gzip)* | |------------------------------|--------------:|----------------------:| -| `unicode-segmenter/grapheme` | 20,925 | 11,013 | +| `unicode-segmenter/grapheme` | 20,928 | 10,973 | | `graphemer` | 133,978 | 31,713 | | `grapheme-splitter` | 63,835 | 19,137 | diff --git a/src/grapheme.js b/src/grapheme.js index ca4f43c..50a1363 100644 --- a/src/grapheme.js +++ b/src/grapheme.js @@ -103,21 +103,7 @@ export function* graphemeSegments(input) { catBegin = catBefore; } - if (cp >= 2325) { - // Note: Lazily update `consonant` and `linker` state - // which is a extra overhead only for Hindi text. - if (!consonant && catBefore === 0) { - consonant = isIndicConjunctConsonant(cp); - } else if (catBefore === 3 /* Extend */) { - // Note: \p{InCB=Linker} is a subset of \p{Extend} - linker = isIndicConjunctLinker(cp); - } - } - - if (cursor < len) { - cp = /** @type {number} */ (input.codePointAt(cursor)); - catAfter = cat(cp, cache); - } else { + if (cursor >= len) { yield { segment: input.slice(index, cursor), index, @@ -129,6 +115,20 @@ export function* graphemeSegments(input) { return; } + // Note: Lazily update `consonant` and `linker` state + // which is a extra overhead only for Hindi text. + if (cp >= 2325) { + if (!consonant && catBefore === 0) { + consonant = isIndicConjunctConsonant(cp); + } else if (catBefore === 3 /* Extend */) { + // Note: \p{InCB=Linker} is a subset of \p{Extend} + linker = isIndicConjunctLinker(cp); + } + } + + cp = /** @type {number} */ (input.codePointAt(cursor)); + catAfter = cat(cp, cache); + if (catBefore === 10 /* Regional_Indicator */) { risCount += 1; } else {