Skip to content

Commit 0862e85

Browse files
committed
Rewrite wcswidth as a state machine
1 parent 0b9a371 commit 0862e85

File tree

2 files changed

+47
-31
lines changed

2 files changed

+47
-31
lines changed

kitty/screen.c

Lines changed: 46 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1867,39 +1867,54 @@ screen_wcswidth(PyObject UNUSED *self, PyObject *str) {
18671867
unsigned long ans = 0;
18681868
char_type prev_ch = 0;
18691869
int prev_width = 0;
1870-
bool in_sgr = false;
1870+
typedef enum {NORMAL, IN_SGR, FLAG_PAIR_STARTED} WCSState;
1871+
WCSState state = NORMAL;
18711872
for (i = 0; i < len; i++) {
18721873
char_type ch = PyUnicode_READ(kind, data, i);
1873-
if (in_sgr) {
1874-
if (ch == 'm') in_sgr = false;
1875-
continue;
1876-
}
1877-
if (ch == 0x1b && i + 1 < len && PyUnicode_READ(kind, data, i + 1) == '[') { in_sgr = true; continue; }
1878-
if (ch == 0xfe0f) {
1879-
if (is_emoji_presentation_base(prev_ch) && prev_width == 1) {
1880-
ans += 1;
1881-
prev_width = 2;
1882-
} else prev_width = 0;
1883-
} else if (ch == 0xfe0e) {
1884-
if (is_emoji_presentation_base(prev_ch) && prev_width == 2) {
1885-
ans -= 1;
1886-
prev_width = 1;
1887-
} else prev_width = 0;
1888-
} else if (is_flag_pair(prev_ch, ch)) {
1889-
prev_width = 2;
1890-
} else {
1891-
int w = wcwidth_std(ch);
1892-
switch(w) {
1893-
case -1:
1894-
case 0:
1895-
prev_width = 0; break;
1896-
case 2:
1897-
prev_width = 2; break;
1898-
default:
1899-
prev_width = 1; break;
1900-
}
1901-
ans += prev_width;
1902-
}
1874+
switch(state) {
1875+
case IN_SGR: {
1876+
if (ch == 'm') state = NORMAL;
1877+
} continue;
1878+
1879+
case FLAG_PAIR_STARTED: {
1880+
state = NORMAL;
1881+
if (is_flag_pair(prev_ch, ch)) break;
1882+
} /* fallthrough */
1883+
1884+
case NORMAL: {
1885+
if (ch == 0x1b && i + 1 < len && PyUnicode_READ(kind, data, i + 1) == '[') { state = IN_SGR; continue; }
1886+
switch(ch) {
1887+
case 0xfe0f: {
1888+
if (is_emoji_presentation_base(prev_ch) && prev_width == 1) {
1889+
ans += 1;
1890+
prev_width = 2;
1891+
} else prev_width = 0;
1892+
} break;
1893+
1894+
case 0xfe0e: {
1895+
if (is_emoji_presentation_base(prev_ch) && prev_width == 2) {
1896+
ans -= 1;
1897+
prev_width = 1;
1898+
} else prev_width = 0;
1899+
} break;
1900+
1901+
default: {
1902+
if (is_flag_codepoint(ch)) state = FLAG_PAIR_STARTED;
1903+
int w = wcwidth_std(ch);
1904+
switch(w) {
1905+
case -1:
1906+
case 0:
1907+
prev_width = 0; break;
1908+
case 2:
1909+
prev_width = 2; break;
1910+
default:
1911+
prev_width = 1; break;
1912+
}
1913+
ans += prev_width;
1914+
} break;
1915+
} break; // switch(ch)
1916+
} break; // case NORMAL
1917+
} // switch(state)
19031918
prev_ch = ch;
19041919
}
19051920
return PyLong_FromUnsignedLong(ans);

kitty_tests/datatypes.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ def w(x):
362362
self.ae(wcswidth('\U0001f1e6a'), 3)
363363
self.ae(wcswidth('\U0001F1E6a\U0001F1E8a'), 6)
364364
self.ae(wcswidth('\U0001F1E6\U0001F1E8a'), 3)
365+
self.ae(wcswidth('\U0001F1E6\U0001F1E8\U0001F1E6'), 4)
365366
# Regional indicator symbols (unicode flags) are defined as having
366367
# Emoji_Presentation so must have width 2
367368
self.ae(tuple(map(w, '\U0001f1ee\U0001f1f3')), (2, 2))

0 commit comments

Comments
 (0)