@@ -198,32 +198,32 @@ const
198198 SEG0_MIN = 128 ,
199199 /** 0x2FFF */
200200 SEG0_MAX = 12287 ,
201- /** ( 0x3000 - 0x80) >> 1 */
202- SEG1_OFF = 6080 ,
201+ /** 0x3000 - 0x80 */
202+ SEG1_OFF = 12160 ,
203203 /** 0xA000 */
204204 SEG1_MIN = 40960 ,
205205 /** 0xDFFF */
206206 SEG1_MAX = 57343 ,
207- /** SEG1_OFF + (( 0xE000 - 0xA000) >> 1 ) */
208- SEG2_OFF = 14272 ,
207+ /** SEG1_OFF + (0xE000 - 0xA000) */
208+ SEG2_OFF = 28544 ,
209209 /** 0xFE00 */
210210 SEG2_MIN = 65024 ;
211211
212212/**
213- * Segmented 4-bit packed lookup table for BMP code points.
213+ * Segmented lookup table for BMP code points.
214214 *
215215 * Memory optimization: Skip regions that are almost 100% category 0 (Any):
216216 * - 0x3000-0x9FFF (CJK): 28,672 codepoints, only 12 non-Any -> inlined fast path
217217 * - 0xE000-0xFDFF (Private Use): 7,680 codepoints, only 1 non-Any -> inlined fast path
218218 *
219219 * Cache segments:
220- * - Segment 0: 0x0080-0x2FFF (12,160 codepoints -> 6,080 bytes)
221- * - Segment 1: 0xA000-0xDFFF (16,384 codepoints -> 8,192 bytes)
222- * - Segment 2: 0xFE00-0xFFFF (512 codepoints -> 256 bytes)
220+ * - Segment 0: 0x0080-0x2FFF (12,160 bytes)
221+ * - Segment 1: 0xA000-0xDFFF (16,384 bytes)
222+ * - Segment 2: 0xFE00-0xFFFF (512 bytes)
223223 *
224- * Total: 14,528 bytes (~14KB )
224+ * Total: 29,056 bytes (~28KB )
225225 */
226- let bmpLookup = new Uint8Array ( 14528 ) ;
226+ let bmpLookup = new Uint8Array ( 29056 ) ;
227227let bmpCursor = ( ( ) => {
228228 let cursor = 0 ;
229229 while ( cursor < grapheme_ranges . length ) {
@@ -236,14 +236,10 @@ let bmpCursor = (() => {
236236
237237 for ( let cp = start ; cp <= end && cp <= BMP_MAX ; cp ++ ) {
238238 let idx = - 1 ;
239- if ( cp >= SEG0_MIN && cp <= SEG0_MAX ) idx = ( cp - SEG0_MIN ) >> 1 ;
240- if ( cp >= SEG1_MIN && cp <= SEG1_MAX ) idx = SEG1_OFF + ( ( cp - SEG1_MIN ) >> 1 ) ;
241- if ( cp >= SEG2_MIN && cp <= BMP_MAX ) idx = SEG2_OFF + ( ( cp - SEG2_MIN ) >> 1 ) ;
242- if ( idx >= 0 ) {
243- bmpLookup [ idx ] = cp & 1
244- ? ( bmpLookup [ idx ] & 0x0F ) | ( cat << 4 )
245- : ( bmpLookup [ idx ] & 0xF0 ) | cat ;
246- }
239+ if ( cp >= SEG0_MIN && cp <= SEG0_MAX ) idx = cp - SEG0_MIN ;
240+ if ( cp >= SEG1_MIN && cp <= SEG1_MAX ) idx = SEG1_OFF + ( cp - SEG1_MIN ) ;
241+ if ( cp >= SEG2_MIN && cp <= BMP_MAX ) idx = SEG2_OFF + ( cp - SEG2_MIN ) ;
242+ if ( idx >= 0 ) bmpLookup [ idx ] = cat ;
247243 }
248244 }
249245 return cursor ;
@@ -258,13 +254,13 @@ let bmpCursor = (() => {
258254 * @return {GraphemeCategoryNum }
259255 */
260256function cat ( cp ) {
261- // Ordered pass by range:
257+ // Ordered pass by range:
262258 // 1. ASCII fast path
263259 // 2. Segment 0 cache
264- // 3. CJK fast path
265- // 4. Segment 1 cache
266- // 5. PrivateUse fast path
267- // 6. Segment 2 cache
260+ // 3. CJK fast path
261+ // 4. Segment 1 cache
262+ // 5. PrivateUse fast path
263+ // 6. Segment 2 cache
268264 // 7. Non-BMP binary search
269265
270266 // ASCII fast path
@@ -274,13 +270,9 @@ function cat(cp) {
274270 if ( cp === 13 ) return 1 ;
275271 return 2 ;
276272 }
277-
278- let byte = 0 , idx = - 1 ;
279273 // Segment 0
280274 if ( cp <= SEG0_MAX ) {
281- idx = ( cp - SEG0_MIN ) >> 1 ;
282- byte = bmpLookup [ idx ] ;
283- return /** @type {GraphemeCategoryNum } */ ( cp & 1 ? byte >> 4 : byte & 0x0F ) ;
275+ return /** @type {GraphemeCategoryNum } */ ( bmpLookup [ cp - SEG0_MIN ] ) ;
284276 }
285277 // CJK fast path
286278 if ( cp < SEG1_MIN ) {
@@ -294,22 +286,18 @@ function cat(cp) {
294286 }
295287 // Segment 1
296288 if ( cp <= SEG1_MAX ) {
297- idx = SEG1_OFF + ( ( cp - SEG1_MIN ) >> 1 ) ;
298- byte = bmpLookup [ idx ] ;
299- return /** @type {GraphemeCategoryNum } */ ( cp & 1 ? byte >> 4 : byte & 0x0F ) ;
289+ return /** @type {GraphemeCategoryNum } */ ( bmpLookup [ SEG1_OFF + ( cp - SEG1_MIN ) ] ) ;
300290 }
301291 // Private Use fast path
302292 if ( cp < SEG2_MIN ) {
303293 return cp === 0xFB1E ? 3 : 0 ;
304294 }
305295 // Segment 2
306296 if ( cp <= BMP_MAX ) {
307- idx = SEG2_OFF + ( ( cp - SEG2_MIN ) >> 1 ) ;
308- byte = bmpLookup [ idx ] ;
309- return /** @type {GraphemeCategoryNum } */ ( cp & 1 ? byte >> 4 : byte & 0x0F ) ;
297+ return /** @type {GraphemeCategoryNum } */ ( bmpLookup [ SEG2_OFF + ( cp - SEG2_MIN ) ] ) ;
310298 }
311299 // Non-BMP
312- idx = findUnicodeRangeIndex ( cp , grapheme_ranges , bmpCursor ) ;
300+ let idx = findUnicodeRangeIndex ( cp , grapheme_ranges , bmpCursor ) ;
313301 return idx < 0 ? 0 : grapheme_ranges [ idx ] [ 2 ] ;
314302}
315303
0 commit comments