diff --git a/.changeset/bitter-suits-arrive.md b/.changeset/bitter-suits-arrive.md
new file mode 100644
index 0000000..f61049f
--- /dev/null
+++ b/.changeset/bitter-suits-arrive.md
@@ -0,0 +1,12 @@
+---
+"unicode-segmenter": patch
+---
+
+Improve runtime perf on the Unicode text processing.
+
+By using a precomputed lookup table for the grapheme categries of BMP characters, it improves perf by more than 10% for common cases, even ~30% for some extream cases.
+
+The lookup table consumes an additional 64 KB of memory, which is acceptable for most JavaScript runtime environments.
+
+This optimization is introduced by OpenCode w/ OpenAI's GPT-OSS-120B. It is the second successful attempt at meaningful optimization in this library.
+(The first one was the Claude Code w/ Claude Opus 4.0)
diff --git a/README.md b/README.md
index 3b49ce3..2444b0f 100644
--- a/README.md
+++ b/README.md
@@ -220,7 +220,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
 
 | Name                         | Unicode® | ESM? |   Size    | Size (min) | Size (min+gzip) | Size (min+br) | Size (min+zstd) |
 |------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|----------------:|
-| `unicode-segmenter/grapheme` |   16.0.0 |   ✔️ |    15,588 |     12,168 |           5,038 |         3,715 |           4,727 |
+| `unicode-segmenter/grapheme` |   16.0.0 |   ✔️ |    15,730 |     12,199 |           5,113 |         3,787 |           4,807 |
 | `graphemer`                  |   15.0.0 |   ✖️ ️|   410,435 |     95,104 |          15,752 |        10,660 |          15,911 |
 | `grapheme-splitter`          |   10.0.0 |   ✖️ |   122,252 |     23,680 |           7,852 |         4,841 |           6,750 |
 | `@formatjs/intl-segmenter`*  |   15.0.0 |   ✖️ |   603,285 |    369,560 |          72,218 |        49,416 |          67,975 |
@@ -236,7 +236,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
 
 | Name                         | Bytecode size | Bytecode size (gzip)* |
 |------------------------------|--------------:|----------------------:|
-| `unicode-segmenter/grapheme` |        21,001 |                11,065 |
+| `unicode-segmenter/grapheme` |        21,435 |                11,351 |
 | `graphemer`                  |       133,978 |                31,713 |
 | `grapheme-splitter`          |        63,835 |                19,137 |
 
@@ -246,16 +246,16 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
 
 Here is a brief explanation, and you can see [archived benchmark results](benchmark/grapheme/_records).
 
-**Performance in Node.js**: `unicode-segmenter/grapheme` is significantly faster than alternatives.
-- 6\~15x faster than other JavaScript libraries
-- 1.5\~3x faster than WASM binding of the Rust's [unicode-segmentation]
-- 1.5\~3x faster than built-in [`Intl.Segmenter`]
+**Performance in Node.js/Bun/Deno**: `unicode-segmenter/grapheme` has best-in-class performance.
+- 8\~35x faster than other JavaScript libraries.
+- 3\~5x faster than WASM binding of the Rust's [unicode-segmentation].
+- 2\~3x faster than built-in [`Intl.Segmenter`].
 
-**Performance in Bun**: `unicode-segmenter/grapheme` has almost the same performance as the built-in [`Intl.Segmenter`], with no performance degradation compared to other JavaScript libraries.
+**Performance in Browsers**: The performance in browser environments varies greatly due to differences in browser engines, which makes benchmarking inconsistent, but:
+- Still significantly faster than other JavaScript libraries.
+- Generally outperforms the built-in in the most browser environments, except the Firefox.
 
-**Performance in Browsers**: The performance in browser environments varies greatly due to differences in browser engines and versions, which makes benchmarking less consistent. Despite these variations, `unicode-segmenter/grapheme` generally outperforms other JavaScript libraries in most environments.
-
-**Performance in React Native**: `unicode-segmenter/grapheme` is significantly faster than alternatives when compiled to Hermes bytecode. It's 3\~8x faster than `graphemer` and 20\~26x faster than `grapheme-splitter`, with the performance gap increasing with input size.
+**Performance in React Native**: `unicode-segmenter/grapheme` is still faster than alternatives when compiled to Hermes bytecode. It's 3\~8x faster than `graphemer` and 20\~26x faster than `grapheme-splitter`, with the performance gap increasing with input size.
 
 **Performance in QuickJS**: `unicode-segmenter/grapheme` is the only usable library in terms of performance.
 
diff --git a/src/core.js b/src/core.js
index 1c4f9cb..8ac4448 100644
--- a/src/core.js
+++ b/src/core.js
@@ -63,9 +63,7 @@ export function decodeUnicodeData(data, cats = '') {
  * @param {CategorizedUnicodeRange<T>[]} ranges
  * @return {number} index of matched unicode range, or -1 if no match
  */
-export function findUnicodeRangeIndex(cp, ranges) {
-  let lo = 0
-    , hi = ranges.length - 1;
+export function findUnicodeRangeIndex(cp, ranges, lo = 0, hi = ranges.length - 1) {
   while (lo <= hi) {
     let mid = lo + hi >>> 1
       , range = ranges[mid];
diff --git a/src/grapheme.js b/src/grapheme.js
index bdbda24..3c6e070 100644
--- a/src/grapheme.js
+++ b/src/grapheme.js
@@ -33,6 +33,8 @@ import { consonant_ranges } from './_incb_data.js';
 
 export { GraphemeCategory };
 
+const BMP_MAX = 0xFFFF;
+
 /**
  * Unicode segmentation by extended grapheme rules.
  *
@@ -49,7 +51,7 @@ export function* graphemeSegments(input) {
   if (cp == null) return;
 
   /** Current cursor position. */
-  let cursor = cp <= 0xFFFF ? 1 : 2;
+  let cursor = cp <= BMP_MAX ? 1 : 2;
 
   /** Total length of the input string. */
   let len = input.length;
@@ -137,7 +139,7 @@ export function* graphemeSegments(input) {
       _hd = cp;
     }
 
-    cursor += cp <= 0xFFFF ? 1 : 2;
+    cursor += cp <= BMP_MAX ? 1 : 2;
     catBefore = catAfter;
   }
 
@@ -194,6 +196,26 @@ export function* splitGraphemes(text) {
   for (let s of graphemeSegments(text)) yield s.segment;
 }
 
+/**
+ * Precompute a fast lookup table for BMP code points (0..0xFFFF)
+ * This table maps each code point to its Grapheme_Cluster_Break category.
+ * It is generated once at module load time using the grapheme_ranges data.
+ * The table is a Uint8Array of length 0x10000 (64KB), which is acceptable in memory.
+ * For code points >= 0x10000 we fall back to binary search.
+ */
+let bmpLookup = new Uint8Array(BMP_MAX + 1);
+let bmpCursor = (() => {
+  let cursor = 0;
+  let cp = 0;
+  while (cp <= BMP_MAX) {
+    let range = grapheme_ranges[cursor++];
+    for (cp = range[0]; cp <= range[1]; cp++) {
+      bmpLookup[cp] = range[2];
+    }
+  }
+  return cursor;
+})();
+
 /**
  * `Grapheme_Cluster_Break` property value of a given codepoint
  *
@@ -204,35 +226,26 @@ export function* splitGraphemes(text) {
  * @return {GraphemeCategoryNum}
  */
 function cat(cp, cache) {
-  if (cp < 127) {
-    // Special-case optimization for ascii, except U+007F.  This
-    // improves performance even for many primarily non-ascii texts,
-    // due to use of punctuation and white space characters from the
-    // ascii range.
-    if (cp >= 32) {
-      return 0 /* GC_Any */;
-    } else if (cp === 10) {
-      return 6 /* GC_LF */;
-    } else if (cp === 13) {
-      return 1 /* GC_CR */;
-    } else {
-      return 2 /* GC_Control */;
-    }
-  } else {
-    // If this char isn't within the cached range, update the cache to the
-    // range that includes it.
-    if (cp < cache[0] || cp > cache[1]) {
-      let index = findUnicodeRangeIndex(cp, grapheme_ranges);
-      if (index < 0) {
-        return 0;
-      }
-      let range = grapheme_ranges[index];
-      cache[0] = range[0];
-      cache[1] = range[1];
-      cache[2] = range[2];
-    }
+  // Fast lookup for BMP (0x0000..0xFFFF) using precomputed table
+  if (cp <= BMP_MAX) {
+    return /** @type {GraphemeCategoryNum} */ (bmpLookup[cp]);
+  }
+
+  // Use cached result
+  if (cp >= cache[0] && cp <= cache[1]) {
     return cache[2];
   }
+
+  // Binary search, starting from bmpCursor
+  let index = findUnicodeRangeIndex(cp, grapheme_ranges, bmpCursor);
+  if (index < 0) {
+    return 0;
+  }
+
+  const range = grapheme_ranges[index];
+  cache[0] = range[0];
+  cache[1] = range[1];
+  return (cache[2] = range[2]);
 };
 
 /**
@@ -291,46 +304,43 @@ function isBoundary(catBefore, catAfter, risCount, emoji, incb) {
 
   // GB6 - L x (L | V | LV | LVT)
   if (catBefore === 5) {
-    if (catAfter === 5 || catAfter === 7 || catAfter === 8 || catAfter === 13) {
-      return false;
-    }
+    return !(catAfter === 5 || catAfter === 7 || catAfter === 8 || catAfter === 13);
+  }
 
-  } else {
-    // GB7 - (LV | V) x (V | T)
-    if (
-      (catBefore === 7 || catBefore === 13) &&
-      (catAfter === 13 || catAfter === 12)
-    ) {
-      return false;
-    }
+  // GB7 - (LV | V) x (V | T)
+  if (
+    (catBefore === 7 || catBefore === 13) &&
+    (catAfter === 13 || catAfter === 12)
+  ) {
+    return false;
+  }
 
-    // GB8 - (LVT | T) x T
-    if (
-      (catBefore === 8 || catBefore === 12) &&
-      catAfter === 12
-    ) {
-      return false;
-    }
+  // GB8 - (LVT | T) x T
+  if (
+    (catBefore === 8 || catBefore === 12) &&
+    catAfter === 12
+  ) {
+    return false;
+  }
 
-    // GB9b
-    if (catBefore === 9) {
-      return false;
-    }
+  // GB9b
+  if (catBefore === 9) {
+    return false;
+  }
 
-    // GB9c
-    if (catAfter === 0 && incb) {
-      return false;
-    }
+  // GB9c
+  if (catAfter === 0 && incb) {
+    return false;
+  }
 
-    // GB11
-    if (catBefore === 14 && catAfter === 4) {
-      return !emoji;
-    }
+  // GB11
+  if (catBefore === 14 && catAfter === 4) {
+    return !emoji;
+  }
 
-    // GB12, GB13
-    if (catBefore === 10 && catAfter === 10) {
-      return risCount % 2 === 0;
-    }
+  // GB12, GB13
+  if (catBefore === 10 && catAfter === 10) {
+    return risCount % 2 === 0;
   }
 
   // GB999