@@ -82,21 +82,10 @@ export function* graphemeSegments(input) {
8282 /** Beginning category of a segment */
8383 let _catBegin = catBefore ;
8484
85- /** Memoize the beginnig code point a the segment. */
85+ /** Memoize the beginnig code point of the segment. */
8686 let _hd = cp ;
8787
8888 while ( cursor < len ) {
89- // Note: Lazily update `consonant` and `linker` state
90- // which is a extra overhead only for Hindi text.
91- if ( cp >= 2325 ) {
92- if ( ! consonant && catBefore === 0 ) {
93- consonant = isIndicConjunctConsonant ( cp ) ;
94- } else if ( catBefore === 3 /* Extend */ ) {
95- // Note: \p{InCB=Linker} is a subset of \p{Extend}
96- linker = isIndicConjunctLinker ( cp ) ;
97- }
98- }
99-
10089 cp = /** @type {number } */ ( input . codePointAt ( cursor ) ) ;
10190 catAfter = cat ( cp ) ;
10291
@@ -110,11 +99,8 @@ export function* graphemeSegments(input) {
11099 ) {
111100 emoji = true ;
112101
113- } else if ( catAfter === 0 /* Any */ && cp >= 2325 ) {
114- // Note: Put GB9c rule checking here to reduce.
115- incb = consonant && linker && ( consonant = isIndicConjunctConsonant ( cp ) ) ;
116- // It cannot be both a linker and a consonant.
117- linker = linker && ! consonant ;
102+ } else if ( catAfter === 0 ) {
103+ incb = consonant && linker && isIndicConjunctConsonant ( cp ) ;
118104 }
119105 }
120106
@@ -134,6 +120,17 @@ export function* graphemeSegments(input) {
134120 index = cursor ;
135121 _catBegin = catAfter ;
136122 _hd = cp ;
123+
124+ } else if ( cp >= 2325 ) {
125+ // Note: Avoid InCB state checking much as possible
126+ // Update InCB state only when continuing within a segment
127+ if ( ! consonant && catBefore === 0 )
128+ consonant = isIndicConjunctConsonant ( _hd ) ;
129+
130+ if ( consonant && catAfter === 3 )
131+ linker = isIndicConjunctLinker ( cp ) ;
132+ else if ( catAfter === 0 )
133+ linker = false ;
137134 }
138135
139136 cursor += cp <= BMP_MAX ? 1 : 2 ;
0 commit comments