Skip to content

Commit f2018ed

Browse files
cometkimclaude
andauthored
Optimize large segments (#80)
Co-Authored-By: Claude <[email protected]>
1 parent 51b4e6b commit f2018ed

File tree

3 files changed

+14
-10
lines changed

3 files changed

+14
-10
lines changed

.changeset/many-days-melt.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
"unicode-segmenter": patch
3+
---
4+
5+
Optimize grapheme segmenter.
6+
7+
By eliminating unnecessary string concatenation, it significantly improved performance when creating large segments. (e.g. Demonic, Hindi, Flags, Skin tones)
8+
Also reduced the memory footprint by internal segment buffer.

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
256256

257257
| Name | Unicode® | ESM? | Size | Size (min) | Size (min+gzip) | Size (min+br) |
258258
|------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|
259-
| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 16,031 | 12,149 | 5,063 | 3,792 |
259+
| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 16,003 | 12,153 | 5,065 | 3,775 |
260260
| `graphemer` | 15.0.0 | ✖️ ️| 410,435 | 95,104 | 15,752 | 10,660 |
261261
| `grapheme-splitter` | 10.0.0 | ✖️ | 122,252 | 23,680 | 7,852 | 4,841 |
262262
| `@formatjs/intl-segmenter`* | 15.0.0 | ✖️ | 603,285 | 369,560 | 72,218 | 49,416 |
@@ -272,7 +272,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
272272

273273
| Name | Bytecode size | Bytecode size (gzip)* |
274274
|------------------------------|--------------:|----------------------:|
275-
| `unicode-segmenter/grapheme` | 22,097 | 11,558 |
275+
| `unicode-segmenter/grapheme` | 22,087 | 11,548 |
276276
| `graphemer` | 133,978 | 31,713 |
277277
| `grapheme-splitter` | 63,835 | 19,137 |
278278

src/grapheme.js

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,10 @@ export function* graphemeSegments(input) {
8888
let _hd = cp;
8989

9090
let index = 0;
91-
let segment = '';
9291

9392
while (true) {
94-
segment += input[cursor++];
95-
if (!isBMP(cp)) {
96-
segment += input[cursor++];
97-
}
93+
let chSize = isBMP(cp) ? 1 : 2;
94+
cursor += chSize;
9895

9996
// Note: Of course the nullish coalescing is useful here,
10097
// but avoid it for aggressive compatibility and perf claim
@@ -120,7 +117,7 @@ export function* graphemeSegments(input) {
120117
catAfter = cat(cp, cache);
121118
} else {
122119
yield {
123-
segment,
120+
segment: input.slice(index, cursor),
124121
index,
125122
input,
126123
_hd,
@@ -150,7 +147,7 @@ export function* graphemeSegments(input) {
150147

151148
if (isBoundary(catBefore, catAfter, risCount, emoji, incb)) {
152149
yield {
153-
segment,
150+
segment: input.slice(index, cursor),
154151
index,
155152
input,
156153
_hd,
@@ -160,7 +157,6 @@ export function* graphemeSegments(input) {
160157

161158
// flush
162159
index = cursor;
163-
segment = '';
164160
emoji = false;
165161
incb = false;
166162
catBegin = catAfter;

0 commit comments

Comments
 (0)