Skip to content

Commit 2896349

Browse files
committed
build(deps): bump unicode-width from =0.1.12 to 0.2.2
1 parent 0805bc8 commit 2896349

17 files changed

Lines changed: 152 additions & 97 deletions

File tree

Cargo.lock

Lines changed: 5 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ sonic-rs = "0.5"
5757
globset = "0.4"
5858
etcetera = "0.11"
5959
arc-swap = "1.9"
60+
memchr = "2"
6061

6162
[workspace.package]
6263
version = "25.7.1"

helix-core/Cargo.toml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,7 @@ ropey.workspace = true
2424
smallvec = "1.15"
2525
smartstring = "1.0.1"
2626
unicode-segmentation.workspace = true
27-
# unicode-width is changing width definitions
28-
# that both break our logic and disagree with common
29-
# width definitions in terminals, we need to replace it.
30-
# For now lets lock the version to avoid rendering glitches
31-
# when installing without `--locked`
32-
unicode-width = "=0.1.12"
27+
unicode-width = "0.2.2"
3328
unicode-general-category = "1.1"
3429
slotmap.workspace = true
3530
tree-house.workspace = true
@@ -58,6 +53,8 @@ parking_lot.workspace = true
5853
globset.workspace = true
5954
regex-cursor = "0.1.5"
6055

56+
memchr.workspace = true
57+
6158
[dev-dependencies]
6259
quickcheck = { version = "1", default-features = false }
6360
indoc = "2.0.6"

helix-core/src/graphemes.rs

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
//! Based on <https://github.com/cessen/led/blob/c4fa72405f510b7fd16052f90a598c429b3104a6/src/graphemes.rs>
44
use ropey::{str_utils::byte_to_char_idx, RopeSlice};
55
use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
6-
use unicode_width::UnicodeWidthStr;
76

87
use std::borrow::Cow;
98
use std::fmt::{self, Debug, Display};
@@ -13,7 +12,7 @@ use std::ptr::NonNull;
1312
use std::{slice, str};
1413

1514
use crate::chars::{char_is_whitespace, char_is_word};
16-
use crate::LineEnding;
15+
use crate::{unicode, LineEnding};
1716

1817
#[inline]
1918
pub fn tab_width_at(visual_x: usize, tab_width: u16) -> usize {
@@ -93,29 +92,30 @@ impl Display for Grapheme<'_> {
9392
}
9493
}
9594

95+
/// Returns the width of the grapheme.
96+
///
97+
/// # Invariant
98+
///
99+
/// This function should only be passed a single grapheme.
100+
#[inline]
96101
#[must_use]
97102
pub fn grapheme_width(g: &str) -> usize {
103+
// ASCII fast-path.
98104
if g.as_bytes()[0] <= 127 {
99-
// Fast-path ascii.
100-
// Point 1: theoretically, ascii control characters should have zero
101-
// width, but in our case we actually want them to have width: if they
102-
// show up in text, we want to treat them as textual elements that can
103-
// be edited. So we can get away with making all ascii single width
104-
// here.
105-
// Point 2: we're only examining the first codepoint here, which means
106-
// we're ignoring graphemes formed with combining characters. However,
107-
// if it starts with ascii, it's going to be a single-width grapeheme
108-
// regardless, so, again, we can get away with that here.
109-
// Point 3: we're only examining the first _byte_. But for utf8, when
110-
// checking for ascii range values only, that works.
105+
// We're only examining the first _byte_. But for UTF-8, when checking
106+
// for ASCII range values only, that works, but which means we're ignoring
107+
// graphemes formed with combining characters. However, if it starts with
108+
// ASCII, it's going to be a single-width grapheme regardless, so, again,
109+
// we can get away with that here.
110+
//
111+
// Theoretically, ASCII control characters should have zero width, but
112+
// in our case we actually want them to have width: if they show up in
113+
// text, we want to treat them as textual elements that can be edited.
111114
1
112115
} else {
113-
// We use max(1) here because all grapeheme clusters--even illformed
114-
// ones--should have at least some width so they can be edited
115-
// properly.
116-
// TODO properly handle unicode width for all codepoints
117-
// example of where unicode width is currently wrong: 🤦🏼‍♂️ (taken from https://hsivonen.fi/string-length/)
118-
UnicodeWidthStr::width(g).max(1)
116+
// We use `.max(1)` here, because all grapheme clusters -- even ill-formed
117+
// ones -- should have at least some width, so they can be edited properly.
118+
unicode::width(g).max(1)
119119
}
120120
}
121121

helix-core/src/lib.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,64 @@ pub mod unicode {
3838
pub use unicode_general_category as category;
3939
pub use unicode_segmentation as segmentation;
4040
pub use unicode_width as width;
41+
use unicode_width::UnicodeWidthStr;
42+
43+
#[inline]
44+
#[must_use]
45+
pub fn width(s: &str) -> usize {
46+
if s.is_empty() {
47+
return 0;
48+
}
49+
50+
let mut width = s.width();
51+
let chars = s.as_bytes();
52+
// `UnicodeWidthStr::width` assigns a width of 1 to certain control
53+
// sequences at the *string* level.
54+
//
55+
// Notably, the CRLF sequence (`"\r\n"`) is treated as a single unit
56+
// and has a total width of 1, even though `'\r'` and `'\n'` each
57+
// have a character width of 1 when considered individually.
58+
//
59+
// This function needs newline and tab characters to contribute zero
60+
// width. We correct for this by subtracting the count of `'\n'` and
61+
// `'\t'` characters from the string width.
62+
//
63+
// NOTE: Subtracting on `\n` works for `\r\n`, as this grapheme only
64+
// counts as 1 width, so just subtracting 1 for the `\n` would zero
65+
// it out, removing its contribution to the width.
66+
for _ in memchr::memchr2_iter(b'\n', b'\t', chars) {
67+
if width == 0 {
68+
break;
69+
}
70+
71+
width -= 1;
72+
}
73+
width
74+
}
75+
76+
#[cfg(test)]
77+
mod test {
78+
use super::width;
79+
80+
#[test]
81+
fn should_have_expected_unicode_width() {
82+
assert_eq!(width("\n"), 0);
83+
assert_eq!(width("\t"), 0);
84+
assert_eq!(width("\r\n"), 0);
85+
assert_eq!(width("\r\n\t"), 0);
86+
assert_eq!(width("\n\t\r\n"), 0);
87+
assert_eq!(width("\n\tH\r\n"), 1);
88+
assert_eq!(width("🤦🏼‍♂️"), 2);
89+
assert_eq!(width("\n🤦🏼‍♂️\n"), 2);
90+
assert_eq!(width("\r\n🤦🏼‍♂️\r\n"), 2);
91+
assert_eq!(width("\t🤦🏼‍♂️\t"), 2);
92+
assert_eq!(width("\n\t🤦🏼‍♂️\t\n"), 2);
93+
assert_eq!(width("\u{200B}"), 0);
94+
assert_eq!(width("▲"), 1);
95+
assert_eq!(width(" ▲ "), 3);
96+
assert_eq!(width("┌"), 1);
97+
}
98+
}
4199
}
42100

43101
pub use helix_loader::find_workspace;

helix-term/src/commands/lsp.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,17 +1397,15 @@ fn compute_inlay_hints_for_view(
13971397
// Truncate the hint if too long
13981398
if let Some(limit) = inlay_hints_length_limit {
13991399
// Limit on displayed width
1400-
use helix_core::unicode::{
1401-
segmentation::UnicodeSegmentation, width::UnicodeWidthStr,
1402-
};
1400+
use helix_core::unicode::{self, segmentation::UnicodeSegmentation};
14031401

1404-
let width = label.width();
1402+
let width = unicode::width(&label);
14051403
let limit = limit.get().into();
14061404
if width > limit {
14071405
let mut floor_boundary = 0;
14081406
let mut acc = 0;
14091407
for (i, grapheme_cluster) in label.grapheme_indices(true) {
1410-
acc += grapheme_cluster.width();
1408+
acc += unicode::width(grapheme_cluster);
14111409

14121410
if acc > limit {
14131411
floor_boundary = i;

helix-term/src/ui/editor.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ use helix_core::{
1919
movement::Direction,
2020
syntax::{self, OverlayHighlights},
2121
text_annotations::TextAnnotations,
22-
unicode::width::UnicodeWidthStr,
23-
visual_offset_from_block, Change, Position, Range, Selection, Transaction,
22+
unicode, visual_offset_from_block, Change, Position, Range, Selection, Transaction,
2423
};
2524
use helix_view::{
2625
annotations::diagnostics::DiagnosticFilter,
@@ -1639,7 +1638,7 @@ impl Component for EditorView {
16391638

16401639
// render status msg
16411640
if let Some((status_msg, severity)) = &cx.editor.status_msg {
1642-
status_msg_width = status_msg.width();
1641+
status_msg_width = unicode::width(status_msg);
16431642
use helix_view::editor::Severity;
16441643
let style = if *severity == Severity::Error {
16451644
cx.editor.theme.get("error")

helix-term/src/ui/prompt.rs

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::compositor::{Component, Compositor, Context, Event, EventResult};
22
use crate::{alt, ctrl, key, shift, ui};
33
use arc_swap::ArcSwap;
4-
use helix_core::syntax;
4+
use helix_core::{syntax, unicode};
55
use helix_view::document::Mode;
66
use helix_view::input::KeyEvent;
77
use helix_view::keyboard::KeyCode;
@@ -13,7 +13,6 @@ use tui::widgets::{Block, Widget};
1313

1414
use helix_core::{
1515
unicode::segmentation::{GraphemeCursor, UnicodeSegmentation},
16-
unicode::width::UnicodeWidthStr,
1716
Position,
1817
};
1918
use helix_view::{
@@ -544,7 +543,7 @@ impl Prompt {
544543
} else {
545544
let line_width = self.line_area.width as usize;
546545

547-
if self.line.width() < line_width {
546+
if unicode::width(&self.line) < line_width {
548547
self.anchor = 0;
549548
} else if self.cursor <= self.anchor {
550549
// Ensure the grapheme under the cursor is in view.
@@ -553,14 +552,14 @@ impl Prompt {
553552
.next_back()
554553
.map(|(i, _)| i)
555554
.unwrap_or_default();
556-
} else if self.line[self.anchor..self.cursor].width() > line_width {
555+
} else if unicode::width(&self.line[self.anchor..self.cursor]) > line_width {
557556
// Set the anchor to the last grapheme cluster before the width is exceeded.
558557
let mut width = 0;
559558
self.anchor = self.line[..self.cursor]
560559
.grapheme_indices(true)
561560
.rev()
562561
.find_map(|(idx, g)| {
563-
width += g.width();
562+
width += unicode::width(g);
564563
if width > line_width {
565564
Some(idx + g.len())
566565
} else {
@@ -571,16 +570,18 @@ impl Prompt {
571570
}
572571

573572
self.truncate_start = self.anchor > 0;
574-
self.truncate_end = self.line[self.anchor..].width() > line_width;
573+
self.truncate_end = unicode::width(&self.line[self.anchor..]) > line_width;
575574

576575
// if we keep inserting characters just before the end elipsis, we move the anchor
577576
// so that those new characters are displayed
578-
if self.truncate_end && self.line[self.anchor..self.cursor].width() >= line_width {
577+
if self.truncate_end
578+
&& unicode::width(&self.line[self.anchor..self.cursor]) >= line_width
579+
{
579580
// Move the anchor forward by one non-zero-width grapheme.
580581
self.anchor += self.line[self.anchor..]
581582
.grapheme_indices(true)
582583
.find_map(|(idx, g)| {
583-
if g.width() > 0 {
584+
if unicode::width(g) > 0 {
584585
Some(idx + g.len())
585586
} else {
586587
None
@@ -771,11 +772,11 @@ impl Component for Prompt {
771772
.clip_left(self.prompt.len() as u16)
772773
.clip_right(if self.prompt.is_empty() { 2 } else { 0 });
773774

774-
let mut col = area.left() as usize + self.line[self.anchor..self.cursor].width();
775+
let mut col = area.left() as usize + unicode::width(&self.line[self.anchor..self.cursor]);
775776

776777
// ensure the cursor does not go beyond elipses
777778
if self.truncate_end
778-
&& self.line[self.anchor..self.cursor].width() >= self.line_area.width as usize
779+
&& unicode::width(&self.line[self.anchor..self.cursor]) >= self.line_area.width as usize
779780
{
780781
col -= 1;
781782
}
@@ -784,7 +785,7 @@ impl Component for Prompt {
784785
col += self.line[self.cursor..]
785786
.graphemes(true)
786787
.next()
787-
.map_or(0, |g| g.width());
788+
.map_or(0, unicode::width);
788789
}
789790

790791
let line = area.height as usize - 1;

helix-term/src/ui/statusline.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use helix_core::indent::IndentStyle;
2-
use helix_core::{coords_at_pos, encoding, unicode::width::UnicodeWidthStr, Position};
2+
use helix_core::{coords_at_pos, encoding, unicode, Position};
33
use helix_lsp::lsp::DiagnosticSeverity;
44
use helix_view::document::DEFAULT_LANGUAGE_NAME;
55
use helix_view::{
@@ -176,7 +176,7 @@ where
176176
format!(" {mode_str} ")
177177
} else {
178178
// If not focused, explicitly leave an empty space instead of returning None.
179-
" ".repeat(mode_str.width() + 2)
179+
" ".repeat(unicode::width(mode_str) + 2)
180180
};
181181
let style = if visible && config.color_modes {
182182
match context.editor.mode() {

helix-tui/src/backend/test.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::{
33
buffer::{Buffer, Cell},
44
terminal::Config,
55
};
6-
use helix_core::unicode::width::UnicodeWidthStr;
6+
use helix_core::unicode;
77
use helix_view::graphics::{CursorKind, Rect};
88
use std::{fmt::Write, io};
99

@@ -30,7 +30,7 @@ fn buffer_view(buffer: &Buffer) -> String {
3030
} else {
3131
overwritten.push((x, &c.symbol))
3232
}
33-
skip = std::cmp::max(skip, c.symbol.width()).saturating_sub(1);
33+
skip = std::cmp::max(skip, unicode::width(&c.symbol)).saturating_sub(1);
3434
}
3535
view.push('"');
3636
if !overwritten.is_empty() {

0 commit comments

Comments
 (0)