@@ -208,34 +208,33 @@ const
208208/**
209209 * Segmented lookup tables for BMP code points.
210210 *
211- * Memory optimization: Skip regions that are almost 100% category 0 (Any):
212- * - 0x3000-0x9FFF (CJK): 28,672 codepoints, only 12 non-Any -> inlined fast path
213- * - 0xE000-0xFDFF (Private Use): 7,680 codepoints, only 1 non-Any -> inlined fast path
211+ * Memory optimization: Skip regions that are almost 100% category {@link GC_Any}:
212+ * - 0x3000-0x9FFF (CJK): 28,672 codepoints, only 12 non-Any ranges -> need to be inlined
213+ * - 0xE000-0xFDFF (Private Use): 7,680 codepoints, only 1 non-Any range -> very rare, but quite simple to be inlined
214+ * - 0xFE00-0xFFFF (Specials): 512 codepoints, only 5 ranges -> very rare, fall back to binary search
214215 *
215216 * Cache segments:
216- * - seg0: 0x0080-0x2FFF (12,160 bytes)
217- * - seg1: 0xA000-0xDFFF (16,384 bytes)
218- * - seg2: 0xFE00-0xFFFF (512 bytes)
217+ * - SEG0: 0x0080-0x2FFF (12,160 bytes)
218+ * - SEG1: 0xA000-0xDFFF (16,384 bytes)
219219 *
220- * Total: 29,056 bytes (~28KB)
220+ * Total: 28,544 bytes (~28KB)
221221 */
222- let seg0 = new Uint8Array ( SEG0_MAX - SEG0_MIN + 1 ) ;
223- let seg1 = new Uint8Array ( SEG1_MAX - SEG1_MIN + 1 ) ;
224- let seg2 = new Uint8Array ( BMP_MAX - SEG2_MIN + 1 ) ;
225- let bmpCursor = ( ( ) => {
222+ let SEG0 = new Uint8Array ( SEG0_MAX - SEG0_MIN + 1 ) ;
223+ let SEG1 = new Uint8Array ( SEG1_MAX - SEG1_MIN + 1 ) ;
224+ let SEG_CURSOR = ( ( ) => {
226225 let cursor = 0 ;
227226 while ( cursor < grapheme_ranges . length ) {
228227 let [ start , end , cat ] = grapheme_ranges [ cursor ] ;
229- if ( start > BMP_MAX ) break ;
228+ if ( start > SEG1_MAX ) break ;
230229 cursor ++ ;
231230
232231 // Skip ranges outside segments (ASCII/CJK/PrivateUse fast paths)
233- if ( end < SEG0_MIN || ( start > SEG0_MAX && end < SEG1_MIN ) || ( start > SEG1_MAX && end < SEG2_MIN ) ) continue ;
232+ if ( end < SEG0_MIN || ( start > SEG0_MAX && end < SEG1_MIN ) ) continue ;
234233
235- for ( let cp = start ; cp <= end && cp <= BMP_MAX ; cp ++ ) {
236- if ( cp >= SEG0_MIN && cp <= SEG0_MAX ) seg0 [ cp - SEG0_MIN ] = cat ;
237- else if ( cp >= SEG1_MIN && cp <= SEG1_MAX ) seg1 [ cp - SEG1_MIN ] = cat ;
238- else if ( cp >= SEG2_MIN ) seg2 [ cp - SEG2_MIN ] = cat ;
234+ for ( let cp = start ; cp <= end ; cp ++ ) {
235+ if ( cp >= SEG0_MIN && cp <= SEG0_MAX ) SEG0 [ cp - SEG0_MIN ] = cat ;
236+ else if ( cp >= SEG1_MIN && cp <= SEG1_MAX ) SEG1 [ cp - SEG1_MIN ] = cat ;
237+ else continue ;
239238 }
240239 }
241240 return cursor ;
@@ -256,8 +255,7 @@ function cat(cp) {
256255 // 3. CJK fast path
257256 // 4. Segment 1 cache
258257 // 5. PrivateUse fast path
259- // 6. Segment 2 cache
260- // 7. Non-BMP binary search
258+ // 7. Binary search
261259
262260 // ASCII fast path
263261 if ( cp < SEG0_MIN ) {
@@ -268,7 +266,7 @@ function cat(cp) {
268266 }
269267 // Segment 0
270268 if ( cp <= SEG0_MAX ) {
271- return /** @type {GraphemeCategoryNum } */ ( seg0 [ cp - SEG0_MIN ] ) ;
269+ return /** @type {GraphemeCategoryNum } */ ( SEG0 [ cp - SEG0_MIN ] ) ;
272270 }
273271 // CJK fast path
274272 if ( cp < SEG1_MIN ) {
@@ -282,18 +280,14 @@ function cat(cp) {
282280 }
283281 // Segment 1
284282 if ( cp <= SEG1_MAX ) {
285- return /** @type {GraphemeCategoryNum } */ ( seg1 [ cp - SEG1_MIN ] ) ;
283+ return /** @type {GraphemeCategoryNum } */ ( SEG1 [ cp - SEG1_MIN ] ) ;
286284 }
287285 // Private Use fast path
288286 if ( cp < SEG2_MIN ) {
289287 return cp === 0xFB1E ? 3 : 0 ;
290288 }
291- // Segment 2
292- if ( cp <= BMP_MAX ) {
293- return /** @type {GraphemeCategoryNum } */ ( seg2 [ cp - SEG2_MIN ] ) ;
294- }
295289 // Non-BMP
296- let idx = findUnicodeRangeIndex ( cp , grapheme_ranges , bmpCursor ) ;
290+ let idx = findUnicodeRangeIndex ( cp , grapheme_ranges , SEG_CURSOR ) ;
297291 return idx < 0 ? 0 : grapheme_ranges [ idx ] [ 2 ] ;
298292}
299293
0 commit comments