diff --git a/README.md b/README.md index d3849de..f68c701 100644 --- a/README.md +++ b/README.md @@ -220,7 +220,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb | Name | Unicode® | ESM? | Size | Size (min) | Size (min+gzip) | Size (min+br) | Size (min+zstd) | |------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|----------------:| -| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 15,607 | 12,166 | 5,042 | 3,747 | 4,725 | +| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 15,586 | 12,166 | 5,037 | 3,717 | 4,726 | | `graphemer` | 15.0.0 | ✖️ ️| 410,435 | 95,104 | 15,752 | 10,660 | 15,911 | | `grapheme-splitter` | 10.0.0 | ✖️ | 122,252 | 23,680 | 7,852 | 4,841 | 6,750 | | `@formatjs/intl-segmenter`* | 15.0.0 | ✖️ | 603,285 | 369,560 | 72,218 | 49,416 | 67,975 | @@ -236,7 +236,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb | Name | Bytecode size | Bytecode size (gzip)* | |------------------------------|--------------:|----------------------:| -| `unicode-segmenter/grapheme` | 20,932 | 10,991 | +| `unicode-segmenter/grapheme` | 21,001 | 11,063 | | `graphemer` | 133,978 | 31,713 | | `grapheme-splitter` | 63,835 | 19,137 | diff --git a/src/grapheme.js b/src/grapheme.js index e16b7fb..3c07060 100644 --- a/src/grapheme.js +++ b/src/grapheme.js @@ -49,7 +49,7 @@ export function* graphemeSegments(input) { if (cp == null) return; /** Current cursor position. */ - let cursor = 0; + let cursor = cp < 0xFFFF ? 1 : 2; /** Total length of the input string. */ let len = input.length; @@ -86,21 +86,7 @@ export function* graphemeSegments(input) { /** Memoize the beginnig code point a the segment. */ let _hd = cp; - while (true) { - cursor += cp < 0xFFFF ? 1 : 2; - - if (cursor >= len) { - yield { - segment: input.slice(index, cursor), - index, - input, - _hd, - _catBegin, - _catEnd: catBefore, - }; - return; - } - + while (cursor < len) { // Note: Lazily update `consonant` and `linker` state // which is a extra overhead only for Hindi text. if (cp >= 2325) { @@ -151,8 +137,20 @@ export function* graphemeSegments(input) { _hd = cp; } + cursor += cp < 0xFFFF ? 1 : 2; catBefore = catAfter; } + + if (index < len) { + yield { + segment: input.slice(index), + index, + input, + _hd, + _catBegin, + _catEnd: catBefore, + }; + } } /**