diff --git a/.changeset/five-seas-enjoy.md b/.changeset/five-seas-enjoy.md
new file mode 100644
index 0000000..6918112
--- /dev/null
+++ b/.changeset/five-seas-enjoy.md
@@ -0,0 +1,10 @@
+---
+"unicode-segmenter": patch
+---
+
+Optimizing grapheme break category lookup for better runtime trade-offs.
+
+See [issue](https://github.com/cometkim/unicode-segmenter/issues/104) for the explanation.
+
+With this change, the library's constant memory footprint is reduced from 64 KB to 14 KB without performance regressions.
+However, the code size increases slightly due to inlining. It's still relatively small.
diff --git a/README.md b/README.md
index da42d3a..0a857d6 100644
--- a/README.md
+++ b/README.md
@@ -215,7 +215,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
 
 | Name                         | Unicode® | ESM? |   Size    | Size (min) | Size (min+gzip) | Size (min+br) | Size (min+zstd) |
 |------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|----------------:|
-| `unicode-segmenter/grapheme` |   16.0.0 |   ✔️ |    15,449 |     12,108 |           5,060 |         3,738 |           4,744 |
+| `unicode-segmenter/grapheme` |   16.0.0 |   ✔️ |    16,752 |     12,562 |           5,308 |         3,968 |           5,013 |
 | `graphemer`                  |   15.0.0 |   ✖️ ️|   410,435 |     95,104 |          15,752 |        10,660 |          15,911 |
 | `grapheme-splitter`          |   10.0.0 |   ✖️ |   122,254 |     23,682 |           7,852 |         4,802 |           6,753 |
 | `@formatjs/intl-segmenter`*  |   15.0.0 |   ✖️ |   603,510 |    369,673 |          72,273 |        49,530 |          68,027 |
@@ -231,7 +231,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
 
 | Name                         | Bytecode size | Bytecode size (gzip)* |
 |------------------------------|--------------:|----------------------:|
-| `unicode-segmenter/grapheme` |        21,372 |                11,328 |
+| `unicode-segmenter/grapheme` |        26,309 |                13,811 |
 | `graphemer`                  |       134,089 |                31,766 |
 | `grapheme-splitter`          |        63,946 |                19,162 |
 
diff --git a/src/grapheme.js b/src/grapheme.js
index d7f227c..624f1db 100644
--- a/src/grapheme.js
+++ b/src/grapheme.js
@@ -193,21 +193,54 @@ export function* splitGraphemes(text) {
   for (let s of graphemeSegments(text)) yield s.segment;
 }
 
+const
+  /** 0x80 */
+  SEG0_MIN = 128,
+  /** 0x2FFF */
+  SEG0_MAX = 12287,
+  /** 0xA000 */
+  SEG1_MIN = 40960,
+  /** 0xDFFF */
+  SEG1_MAX = 57343;
+
 /**
- * Precompute a fast lookup table for BMP code points (0..0xFFFF)
- * This table maps each code point to its Grapheme_Cluster_Break category.
- * It is generated once at module load time using the grapheme_ranges data.
- * The table is a Uint8Array of length 0x10000 (64KB), which is acceptable in memory.
- * For code points >= 0x10000 we fall back to binary search.
+ * Segmented 4-bit packed lookup tables for BMP code points.
+ *
+ * Memory optimization: Skip regions that are almost 100% category {@link GC_Any}:
+ * - 0x3000-0x9FFF (CJK): 28,672 codepoints, only 12 non-Any ranges -> need to be inlined
+ * - 0xE000-0xFDFF (Private Use): 7,680 codepoints, only 1 non-Any range -> very rare, but quite simple to be inlined
+ * - 0xFE00-0xFFFF (Specials): 512 codepoints, only 5 ranges -> very rare, fall back to binary search
+ *
+ * Cache segments (4-bit packed, 2 categories per byte):
+ * - SEG0: 0x0080-0x2FFF (12,160 codepoints -> 6,080 bytes)
+ * - SEG1: 0xA000-0xDFFF (16,384 codepoints -> 8,192 bytes)
+ *
+ * Total: 14,272 bytes (~14KB)
  */
-let bmpLookup = new Uint8Array(BMP_MAX + 1);
-let bmpCursor = (() => {
+const SEG0 = new Uint8Array((SEG0_MAX - SEG0_MIN + 1) >> 1);
+const SEG1 = new Uint8Array((SEG1_MAX - SEG1_MIN + 1) >> 1);
+const SEG_CURSOR = (() => {
   let cursor = 0;
-  let cp = 0;
-  while (cp <= BMP_MAX) {
-    let range = grapheme_ranges[cursor++];
-    for (cp = range[0]; cp <= range[1]; cp++) {
-      bmpLookup[cp] = range[2];
+  while (cursor < grapheme_ranges.length) {
+    let [start, end, cat] = grapheme_ranges[cursor];
+    if (start > SEG1_MAX) break;
+    cursor++;
+
+    // Skip ranges outside segments (ASCII/CJK/PrivateUse fast paths)
+    if (end < SEG0_MIN || (start > SEG0_MAX && end < SEG1_MIN)) continue;
+
+    for (let cp = start; cp <= end; cp++) {
+      let /** @type {Uint8Array} */ seg, idx = 0;
+
+      if (cp >= SEG0_MIN && cp <= SEG0_MAX) {
+        seg = SEG0; idx = (cp - SEG0_MIN) >> 1;
+      } else if (cp >= SEG1_MIN) {
+        seg = SEG1; idx = (cp - SEG1_MIN) >> 1;
+      } else continue;
+
+      seg[idx] = cp & 1
+        ? (seg[idx] & 0x0F) | (cat << 4)
+        : (seg[idx] & 0xF0) | cat;
     }
   }
   return cursor;
@@ -222,15 +255,41 @@ let bmpCursor = (() => {
  * @return {GraphemeCategoryNum}
  */
 function cat(cp) {
-  // Fast lookup for BMP (0x0000..0xFFFF) using precomputed table
-  if (cp <= BMP_MAX) {
-    return /** @type {GraphemeCategoryNum} */ (bmpLookup[cp]);
+  // ASCII fast path
+  if (cp < SEG0_MIN) {
+    if (cp >= 32) return 0;
+    if (cp === 10) return 6;
+    if (cp === 13) return 1;
+    return 2;
   }
-
-  // Binary search, starting from bmpCursor
-  let index = findUnicodeRangeIndex(cp, grapheme_ranges, bmpCursor);
-  return index < 0 ? 0 : grapheme_ranges[index][2];
-};
+  // Segment 0
+  if (cp <= SEG0_MAX) {
+    let byte = SEG0[(cp - SEG0_MIN) >> 1];
+    return /** @type {GraphemeCategoryNum} */ (cp & 1 ? byte >> 4 : byte & 0x0F);
+  }
+  // CJK fast path
+  if (cp < SEG1_MIN) {
+    if (cp < 0x3030) return cp >= 0x302A ? 3 : 0;
+    if (cp < 0x309B) {
+      if (cp === 0x3030 || cp === 0x303D) return 4;
+      return cp >= 0x3099 ? 3 : 0;
+    }
+    if (cp === 0x3297 || cp === 0x3299) return 4;
+    return 0;
+  }
+  // Segment 1
+  if (cp <= SEG1_MAX) {
+    let byte = SEG1[(cp - SEG1_MIN) >> 1];
+    return /** @type {GraphemeCategoryNum} */ (cp & 1 ? byte >> 4 : byte & 0x0F);
+  }
+  // Private Use fast path
+  if (cp < 0xFE00) {
+    return cp === 0xFB1E ? 3 : 0;
+  }
+  // Specials (0xFE00-0xFFFF) and Non-BMP: binary search
+  let idx = findUnicodeRangeIndex(cp, grapheme_ranges, SEG_CURSOR);
+  return idx < 0 ? 0 : grapheme_ranges[idx][2];
+}
 
 /**
  * @param {number} cp