Skip to content

Commit 3d23120

Browse files
committed
Move InCB pattern lookup to bottom
1 parent 298d510 commit 3d23120

File tree

1 file changed

+8
-13
lines changed

1 file changed

+8
-13
lines changed

src/grapheme.js

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -82,21 +82,10 @@ export function* graphemeSegments(input) {
8282
/** Beginning category of a segment */
8383
let _catBegin = catBefore;
8484

85-
/** Memoize the beginnig code point a the segment. */
85+
/** Memoize the beginnig code point of the segment. */
8686
let _hd = cp;
8787

8888
while (cursor < len) {
89-
// Note: Lazily update `consonant` and `linker` state
90-
// which is a extra overhead only for Hindi text.
91-
if (cp >= 2325) {
92-
if (!consonant && catBefore === 0) {
93-
consonant = isIndicConjunctConsonant(cp);
94-
} else if (catBefore === 3 /* Extend */) {
95-
// Note: \p{InCB=Linker} is a subset of \p{Extend}
96-
linker = isIndicConjunctLinker(cp);
97-
}
98-
}
99-
10089
cp = /** @type {number} */ (input.codePointAt(cursor));
10190
catAfter = cat(cp);
10291

@@ -110,7 +99,7 @@ export function* graphemeSegments(input) {
11099
) {
111100
emoji = true;
112101

113-
} else if (catAfter === 0 /* Any */ && cp >= 2325) {
102+
} else if (catAfter === 0 /* Any */) {
114103
// Note: Put GB9c rule checking here to reduce.
115104
incb = consonant && linker && (consonant = isIndicConjunctConsonant(cp));
116105
// It cannot be both a linker and a consonant.
@@ -134,6 +123,12 @@ export function* graphemeSegments(input) {
134123
index = cursor;
135124
_catBegin = catAfter;
136125
_hd = cp;
126+
} else if (cp >= 2325 && cp <= 3386) {
127+
// Update InCB state only when continuing within a segment
128+
if (!consonant && catBefore === 0)
129+
consonant = isIndicConjunctConsonant(_hd);
130+
if (catAfter === 3 /* Extend */)
131+
linker = linker || isIndicConjunctLinker(cp);
137132
}
138133

139134
cursor += cp <= BMP_MAX ? 1 : 2;

0 commit comments

Comments
 (0)