diff --git a/README.md b/README.md index 5e1f9c9..59be2a6 100644 --- a/README.md +++ b/README.md @@ -220,7 +220,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb | Name | Unicode® | ESM? | Size | Size (min) | Size (min+gzip) | Size (min+br) | Size (min+zstd) | |------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|----------------:| -| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 15,919 | 12,187 | 5,057 | 3,738 | 4,745 | +| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 15,625 | 12,166 | 5,044 | 3,752 | 4,724 | | `graphemer` | 15.0.0 | ✖️ ️| 410,435 | 95,104 | 15,752 | 10,660 | 15,911 | | `grapheme-splitter` | 10.0.0 | ✖️ | 122,252 | 23,680 | 7,852 | 4,841 | 6,750 | | `@formatjs/intl-segmenter`* | 15.0.0 | ✖️ | 603,285 | 369,560 | 72,218 | 49,416 | 67,975 | @@ -236,7 +236,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb | Name | Bytecode size | Bytecode size (gzip)* | |------------------------------|--------------:|----------------------:| -| `unicode-segmenter/grapheme` | 20,932 | 10,977 | +| `unicode-segmenter/grapheme` | 20,932 | 10,994 | | `graphemer` | 133,978 | 31,713 | | `grapheme-splitter` | 63,835 | 19,137 | diff --git a/src/grapheme.js b/src/grapheme.js index 8f7b123..a1b5286 100644 --- a/src/grapheme.js +++ b/src/grapheme.js @@ -43,30 +43,30 @@ export { GraphemeCategory }; * @return {GraphemeSegmenter} iterator for grapheme cluster segments */ export function* graphemeSegments(input) { + let cp = input.codePointAt(0); + // do nothing on empty string - if (input === '') { - return; - } + if (cp == null) return; - /** @type {number} Current cursor position. */ + /** Current cursor position. */ let cursor = 0; - /** @type {number} Total length of the input string. */ + /** Total length of the input string. */ let len = input.length; - /** @type {GraphemeCategoryNum | null} Category of codepoint immediately preceding cursor, if known. */ - let catBefore = null; + /** @type {import('./_grapheme_data.js').GraphemeCategoryRange} */ + let cache = [0, 0, 2 /* GC_Control */]; - /** @type {GraphemeCategoryNum | null} Category of codepoint immediately preceding cursor, if known. */ - let catAfter = null; + /** Category of codepoint immediately preceding cursor */ + let catBefore = cat(cp, cache); - /** @type {GraphemeCategoryNum | null} Beginning category of a segment */ - let catBegin = null; + /** Beginning category of a segment */ + let catBegin = catBefore; - /** @type {import('./_grapheme_data.js').GraphemeCategoryRange} */ - let cache = [0, 0, 2 /* GC_Control */]; + /** @type {GraphemeCategoryNum | null} Category of codepoint immediately preceding cursor. */ + let catAfter = null; - /** @type {number} The number of RIS codepoints preceding `cursor`. */ + /** The number of RIS codepoints preceding `cursor`. */ let risCount = 0; /** Emoji state */ @@ -81,8 +81,6 @@ export function* graphemeSegments(input) { /** InCB=Consonant InCB=Linker x InCB=Consonant */ let incb = false; - let cp = /** @type {number} */ (input.codePointAt(cursor)); - /** Memoize the beginnig code point a the segment. */ let _hd = cp; @@ -91,21 +89,13 @@ export function* graphemeSegments(input) { while (true) { cursor += cp < 0xFFFF ? 1 : 2; - // Note: Of course the nullish coalescing is useful here, - // but avoid it for aggressive compatibility and perf claim - catBefore = catAfter; - if (catBefore === null) { - catBefore = cat(cp, cache); - catBegin = catBefore; - } - if (cursor >= len) { yield { segment: input.slice(index, cursor), index, input, _hd, - _catBegin: /** @type {typeof catBefore} */ (catBegin), + _catBegin: catBegin, _catEnd: catBefore, }; return; @@ -149,7 +139,7 @@ export function* graphemeSegments(input) { index, input, _hd, - _catBegin: /** @type {typeof catBefore} */ (catBegin), + _catBegin: catBegin, _catEnd: catBefore, }; @@ -160,6 +150,8 @@ export function* graphemeSegments(input) { catBegin = catAfter; _hd = cp; } + + catBefore = catAfter; } }