Move InCB pattern lookup to bottom

cometkim · cometkim · commit 3d2312077209 · 2025-12-15T03:38:21.000+09:00
diff --git a/src/grapheme.js b/src/grapheme.js
@@ -82,21 +82,10 @@ export function* graphemeSegments(input) {
   /** Beginning category of a segment */
   let _catBegin = catBefore;
 
-  /** Memoize the beginnig code point a the segment. */
+  /** Memoize the beginnig code point of the segment. */
   let _hd = cp;
 
   while (cursor < len) {
-    // Note: Lazily update `consonant` and `linker` state
-    // which is a extra overhead only for Hindi text.
-    if (cp >= 2325) {
-      if (!consonant && catBefore === 0) {
-        consonant = isIndicConjunctConsonant(cp);
-      } else if (catBefore === 3 /* Extend */) {
-        // Note: \p{InCB=Linker} is a subset of \p{Extend}
-        linker = isIndicConjunctLinker(cp);
-      }
-    }
-
     cp = /** @type {number} */ (input.codePointAt(cursor));
     catAfter = cat(cp);
 
@@ -110,7 +99,7 @@ export function* graphemeSegments(input) {
       ) {
         emoji = true;
 
-      } else if (catAfter === 0 /* Any */ && cp >= 2325) {
+      } else if (catAfter === 0 /* Any */) {
         // Note: Put GB9c rule checking here to reduce.
         incb = consonant && linker && (consonant = isIndicConjunctConsonant(cp));
         // It cannot be both a linker and a consonant.
@@ -134,6 +123,12 @@ export function* graphemeSegments(input) {
       index = cursor;
       _catBegin = catAfter;
       _hd = cp;
+    } else if (cp >= 2325 && cp <= 3386) {
+      // Update InCB state only when continuing within a segment
+      if (!consonant && catBefore === 0)
+        consonant = isIndicConjunctConsonant(_hd);
+      if (catAfter === 3 /* Extend */)
+        linker = linker || isIndicConjunctLinker(cp);
     }
 
     cursor += cp <= BMP_MAX ? 1 : 2;