@@ -45,47 +45,47 @@ const BMP_MAX = 0xFFFF;
4545 * @return {GraphemeSegmenter } iterator for grapheme cluster segments
4646 */
4747export function * graphemeSegments ( input ) {
48- let cp = input . codePointAt ( 0 ) ;
49-
50- // do nothing on empty string
51- if ( cp == null ) return ;
52-
53- /** Current cursor position. */
54- let cursor = cp <= BMP_MAX ? 1 : 2 ;
55-
5648 /** Total length of the input string. */
5749 let len = input . length ;
5850
59- /** Category of codepoint immediately preceding cursor */
60- let catBefore = cat ( cp ) ;
51+ // do nothing on empty string
52+ if ( len === 0 ) return ;
6153
62- /** @type {GraphemeCategoryNum } Category of codepoint immediately preceding cursor. */
63- let catAfter = 0 ;
54+ let cp = /** @type {number }*/ ( input . codePointAt ( 0 ) ) ;
6455
65- /** The number of RIS codepoints preceding `cursor` . */
66- let risCount = 0 ;
56+ /** Memoize the beginning code point of the segment . */
57+ let _hd = cp ;
6758
6859 /**
6960 * Emoji state for GB11: tracks if we've seen Extended_Pictographic followed by Extend* ZWJ
7061 * Only relevant when catBefore === ZWJ && catAfter === Extended_Pictographic
7162 */
7263 let emoji = false ;
7364
65+ /** The number of RI codepoints preceding `cursor`. */
66+ let riCount = 0 ;
67+
7468 /** InCB=Consonant - segment started with Indic consonant */
7569 let consonant = false ;
7670
7771 /** InCB=Linker - seen a linker after consonant */
7872 let linker = false ;
7973
80- let index = 0 ;
74+ /** Category of codepoint immediately preceding cursor */
75+ let catBefore = cat ( cp ) ;
8176
82- /** Beginning category of a segment */
77+ /** Memoize the beginning category of the segment */
8378 let _catBegin = catBefore ;
8479
85- /** Memoize the beginning code point of the segment. */
86- let _hd = cp ;
80+ /** @type {GraphemeCategoryNum } Category of codepoint immediately preceding cursor. */
81+ let catAfter = 0 ;
82+
83+ let index = 0 ;
84+ let cursor = 0 ;
8785
8886 while ( cursor < len ) {
87+ cursor += cp <= BMP_MAX ? 1 : 2 ;
88+
8989 cp = /** @type {number } */ ( input . codePointAt ( cursor ) ) ;
9090 catAfter = cat ( cp ) ;
9191
@@ -117,8 +117,8 @@ export function* graphemeSegments(input) {
117117 }
118118 // GB12, GB13: RI × RI (odd count means no break)
119119 else if ( catBefore === 10 && catAfter === 10 ) {
120- // risCount is count BEFORE current RI, so odd means this is 2nd, 4th, etc.
121- boundary = risCount ++ % 2 === 1 ;
120+ // riCount is count BEFORE current RI, so odd means this is 2nd, 4th, etc.
121+ boundary = riCount ++ % 2 === 1 ;
122122 }
123123 // GB6: L × (L | V | LV | LVT)
124124 else if ( catBefore === 5 ) {
@@ -150,7 +150,7 @@ export function* graphemeSegments(input) {
150150
151151 // Reset segment state
152152 emoji = false ;
153- risCount = 0 ;
153+ riCount = 0 ;
154154 index = cursor ;
155155 _catBegin = catAfter ;
156156 _hd = cp ;
@@ -181,7 +181,6 @@ export function* graphemeSegments(input) {
181181 }
182182 }
183183
184- cursor += cp <= BMP_MAX ? 1 : 2 ;
185184 catBefore = catAfter ;
186185 }
187186
0 commit comments