11import stripAnsi from 'strip-ansi' ;
22import { eastAsianWidth } from 'get-east-asian-width' ;
3- import emojiRegex from 'emoji-regex' ;
3+
4+ /**
5+ Logic:
6+ - Segment graphemes to match how terminals render clusters.
7+ - Width rules:
8+ 1. Skip non-printing clusters (Default_Ignorable, Control, pure Mark, lone Surrogates). Tabs are ignored by design.
9+ 2. Emoji clusters are double-width only when VS16 is present, the base has Emoji_Presentation (and not VS15), or the cluster has multiple scalars (flags, ZWJ, keycaps, tags, etc.).
10+ 3. Otherwise use East Asian Width of the cluster’s first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark).
11+ */
412
513const segmenter = new Intl . Segmenter ( ) ;
614
7- const defaultIgnorableCodePointRegex = / ^ \p{ Default_Ignorable_Code_Point} $ / u;
15+ // Whole-cluster zero-width
16+ const zeroWidthClusterRegex = / ^ (?: \p{ Default_Ignorable_Code_Point} | \p{ Control} | \p{ Mark} | \p{ Surrogate} ) + $ / v;
17+
18+ // Pick the base scalar if the cluster starts with Prepend/Format/Marks
19+ const leadingNonPrintingRegex = / ^ [ \p{ Default_Ignorable_Code_Point} \p{ Control} \p{ Format} \p{ Mark} \p{ Surrogate} ] + / v;
20+
21+ // RGI emoji sequences
22+ const rgiEmojiRegex = / ^ \p{ RGI_Emoji} $ / v;
23+ // Default emoji presentation (single-scalar emoji without VS16)
24+ const emojiPresentationRegex = / ^ \p{ Emoji_Presentation} $ / v;
25+
26+ function baseVisible ( segment ) {
27+ return segment . replace ( leadingNonPrintingRegex , '' ) ;
28+ }
29+
30+ function isZeroWidthCluster ( segment ) {
31+ return zeroWidthClusterRegex . test ( segment ) ;
32+ }
33+
34+ function isDoubleWidthEmojiCluster ( segment ) {
35+ const visible = baseVisible ( segment ) ;
36+ const baseScalar = visible . codePointAt ( 0 ) ;
37+ const baseChar = String . fromCodePoint ( baseScalar ) ;
38+ const baseIsEmojiPresentation = emojiPresentationRegex . test ( baseChar ) ;
39+ const hasVs16 = segment . includes ( '\uFE0F' ) ;
40+ const hasVs15 = segment . includes ( '\uFE0E' ) ;
41+ const codePointCount = [ ...segment ] . length ;
42+ const multiScalarMeaningful = codePointCount > 1 && ! ( codePointCount === 2 && hasVs15 && ! hasVs16 ) ;
43+
44+ return hasVs16 || ( baseIsEmojiPresentation && ! hasVs15 ) || multiScalarMeaningful ;
45+ }
46+
47+ function trailingHalfwidthWidth ( segment , eastAsianWidthOptions ) {
48+ let extra = 0 ;
49+ if ( segment . length > 1 ) {
50+ for ( const char of segment . slice ( 1 ) ) {
51+ if ( char >= '\uFF00' && char <= '\uFFEF' ) {
52+ extra += eastAsianWidth ( char . codePointAt ( 0 ) , eastAsianWidthOptions ) ;
53+ }
54+ }
55+ }
856
9- export default function stringWidth ( string , options = { } ) {
10- if ( typeof string !== 'string' || string . length === 0 ) {
57+ return extra ;
58+ }
59+
60+ export default function stringWidth ( input , options = { } ) {
61+ if ( typeof input !== 'string' || input . length === 0 ) {
1162 return 0 ;
1263 }
1364
@@ -16,6 +67,8 @@ export default function stringWidth(string, options = {}) {
1667 countAnsiEscapeCodes = false ,
1768 } = options ;
1869
70+ let string = input ;
71+
1972 if ( ! countAnsiEscapeCodes ) {
2073 string = stripAnsi ( string ) ;
2174 }
@@ -27,55 +80,24 @@ export default function stringWidth(string, options = {}) {
2780 let width = 0 ;
2881 const eastAsianWidthOptions = { ambiguousAsWide : ! ambiguousIsNarrow } ;
2982
30- for ( const { segment : character } of segmenter . segment ( string ) ) {
31- const codePoint = character . codePointAt ( 0 ) ;
32-
33- // Ignore control characters
34- if ( codePoint <= 0x1F || ( codePoint >= 0x7F && codePoint <= 0x9F ) ) {
83+ for ( const { segment} of segmenter . segment ( string ) ) {
84+ // Zero-width / non-printing clusters
85+ if ( isZeroWidthCluster ( segment ) ) {
3586 continue ;
3687 }
3788
38- // Ignore zero-width characters
39- if (
40- ( codePoint >= 0x20_0B && codePoint <= 0x20_0F ) // Zero-width space, non-joiner, joiner, left-to-right mark, right-to-left mark
41- || codePoint === 0xFE_FF // Zero-width no-break space
42- ) {
43- continue ;
44- }
45-
46- // Ignore combining characters
47- if (
48- ( codePoint >= 0x3_00 && codePoint <= 0x3_6F ) // Combining diacritical marks
49- || ( codePoint >= 0x1A_B0 && codePoint <= 0x1A_FF ) // Combining diacritical marks extended
50- || ( codePoint >= 0x1D_C0 && codePoint <= 0x1D_FF ) // Combining diacritical marks supplement
51- || ( codePoint >= 0x20_D0 && codePoint <= 0x20_FF ) // Combining diacritical marks for symbols
52- || ( codePoint >= 0xFE_20 && codePoint <= 0xFE_2F ) // Combining half marks
53- ) {
54- continue ;
55- }
56-
57- // Ignore surrogate pairs
58- if ( codePoint >= 0xD8_00 && codePoint <= 0xDF_FF ) {
59- continue ;
60- }
61-
62- // Ignore variation selectors
63- if ( codePoint >= 0xFE_00 && codePoint <= 0xFE_0F ) {
64- continue ;
65- }
66-
67- // This covers some of the above cases, but we still keep them for performance reasons.
68- if ( defaultIgnorableCodePointRegex . test ( character ) ) {
69- continue ;
70- }
71-
72- // TODO: Use `/\p{RGI_Emoji}/v` when targeting Node.js 20.
73- if ( emojiRegex ( ) . test ( character ) ) {
89+ // Emoji width logic
90+ if ( rgiEmojiRegex . test ( segment ) && isDoubleWidthEmojiCluster ( segment ) ) {
7491 width += 2 ;
7592 continue ;
7693 }
7794
95+ // Everything else: EAW of the cluster’s first visible scalar
96+ const codePoint = baseVisible ( segment ) . codePointAt ( 0 ) ;
7897 width += eastAsianWidth ( codePoint , eastAsianWidthOptions ) ;
98+
99+ // Add width for trailing Halfwidth and Fullwidth Forms (e.g., ゙, ゚, ー)
100+ width += trailingHalfwidthWidth ( segment , eastAsianWidthOptions ) ;
79101 }
80102
81103 return width ;
0 commit comments