@@ -49,20 +49,24 @@ export function* graphemeSegments(input) {
4949 }
5050
5151 const len = input . length ;
52- let cursor = 0 ;
5352 let index = 0 ;
53+ let cp = /** @type {number } */ ( input . codePointAt ( 0 ) ) ;
54+ let cursor = cp <= 0xFFFF ? 1 : 2 ;
5455
55- /** @type {GraphemeCategoryNum | null } Category of codepoint immediately preceding cursor, if known. */
56- let catBefore = null ;
56+ /** Memoize the beginnig code point a the segment. */
57+ let _hd = cp ;
58+
59+ /** @type {import('./_grapheme_data.js').GraphemeCategoryRange } */
60+ const cache = [ 0 , 0 , 2 /* GC_Control */ ] ;
61+
62+ /** Category of codepoint immediately preceding cursor, if known. */
63+ let catBefore = cat ( cp , cache ) ;
5764
5865 /** @type {GraphemeCategoryNum | null } Category of codepoint immediately preceding cursor, if known. */
5966 let catAfter = null ;
6067
61- /** @type {GraphemeCategoryNum | null } Beginning category of a segment */
62- let catBegin = null ;
63-
64- /** @type {import('./_grapheme_data.js').GraphemeCategoryRange } */
65- const cache = [ 0 , 0 , 2 /* GC_Control */ ] ;
68+ /** Beginning category of a segment */
69+ let catBegin = catBefore ;
6670
6771 /** @type {number } The number of RIS codepoints preceding `cursor`. */
6872 let risCount = 0 ;
@@ -79,34 +83,7 @@ export function* graphemeSegments(input) {
7983 /** InCB=Consonant InCB=Linker x InCB=Consonant */
8084 let incb = false ;
8185
82- let cp = /** @type {number } */ ( input . codePointAt ( cursor ) ) ;
83-
84- /** Memoize the beginnig code point a the segment. */
85- let _hd = cp ;
86-
8786 while ( cursor < len ) {
88- cursor += cp <= 0xFFFF ? 1 : 2 ;
89-
90- // Note: Of course the nullish coalescing is useful here,
91- // but avoid it for aggressive compatibility and perf claim
92- catBefore = catAfter ;
93- if ( catBefore === null ) {
94- catBefore = cat ( cp , cache ) ;
95- catBegin = catBefore ;
96- }
97-
98- if ( cursor >= len ) {
99- yield {
100- segment : input . slice ( index , cursor ) ,
101- index,
102- input,
103- _hd,
104- _catBegin : /** @type {typeof catBefore } */ ( catBegin ) ,
105- _catEnd : catBefore ,
106- } ;
107- return ;
108- }
109-
11087 // Note: Lazily update `consonant` and `linker` state
11188 // which is a extra overhead only for Hindi text.
11289 if ( cp >= 2325 ) {
@@ -156,7 +133,23 @@ export function* graphemeSegments(input) {
156133 catBegin = catAfter ;
157134 _hd = cp ;
158135 }
136+
137+ catBefore = catAfter ;
138+ cursor += cp <= 0xFFFF ? 1 : 2 ;
159139 }
140+
141+ if ( index < len ) {
142+ yield {
143+ segment : input . slice ( index ) ,
144+ index,
145+ input,
146+ _hd,
147+ _catBegin : /** @type {typeof catBefore } */ ( catBegin ) ,
148+ _catEnd : catBefore ,
149+ } ;
150+ }
151+
152+
160153}
161154
162155/**
0 commit comments