@@ -82,21 +82,10 @@ export function* graphemeSegments(input) {
8282 /** Beginning category of a segment */
8383 let _catBegin = catBefore ;
8484
85- /** Memoize the beginnig code point a the segment. */
85+ /** Memoize the beginnig code point of the segment. */
8686 let _hd = cp ;
8787
8888 while ( cursor < len ) {
89- // Note: Lazily update `consonant` and `linker` state
90- // which is a extra overhead only for Hindi text.
91- if ( cp >= 2325 ) {
92- if ( ! consonant && catBefore === 0 ) {
93- consonant = isIndicConjunctConsonant ( cp ) ;
94- } else if ( catBefore === 3 /* Extend */ ) {
95- // Note: \p{InCB=Linker} is a subset of \p{Extend}
96- linker = isIndicConjunctLinker ( cp ) ;
97- }
98- }
99-
10089 cp = /** @type {number } */ ( input . codePointAt ( cursor ) ) ;
10190 catAfter = cat ( cp ) ;
10291
@@ -110,7 +99,7 @@ export function* graphemeSegments(input) {
11099 ) {
111100 emoji = true ;
112101
113- } else if ( catAfter === 0 /* Any */ && cp >= 2325 ) {
102+ } else if ( catAfter === 0 /* Any */ ) {
114103 // Note: Put GB9c rule checking here to reduce.
115104 incb = consonant && linker && ( consonant = isIndicConjunctConsonant ( cp ) ) ;
116105 // It cannot be both a linker and a consonant.
@@ -134,6 +123,12 @@ export function* graphemeSegments(input) {
134123 index = cursor ;
135124 _catBegin = catAfter ;
136125 _hd = cp ;
126+ } else if ( cp >= 2325 && cp <= 3386 ) {
127+ // Update InCB state only when continuing within a segment
128+ if ( ! consonant && catBefore === 0 )
129+ consonant = isIndicConjunctConsonant ( _hd ) ;
130+ if ( catAfter === 3 /* Extend */ )
131+ linker = linker || isIndicConjunctLinker ( cp ) ;
137132 }
138133
139134 cursor += cp <= BMP_MAX ? 1 : 2 ;
0 commit comments