Skip to content
This repository was archived by the owner on Jul 9, 2025. It is now read-only.

Commit b588307

Browse files
Bug 1956123 - build(rust): shim-upgrade unicode-width 0.1.10 → 0.2.0 r=glandium,supply-chain-reviewers
Differential Revision: https://phabricator.services.mozilla.com/D243188
1 parent 684141f commit b588307

File tree

16 files changed

+29893
-971
lines changed

16 files changed

+29893
-971
lines changed

Cargo.lock

Lines changed: 11 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,9 @@ rure = { path = "third_party/rust/rure" }
220220
# Patch `plist` to work with `indexmap` 2.*
221221
plist = { path = "third_party/rust/plist" }
222222

223+
# Patch `unicode-width` 0.1.* to 0.2.
224+
unicode-width = { path = "build/rust/unicode-width" }
225+
223226
# To-be-published changes.
224227
unicode-bidi = { git = "https://github.com/servo/unicode-bidi", rev = "ca612daf1c08c53abe07327cb3e6ef6e0a760f0c" }
225228
nss-gk-api = { git = "https://github.com/beurdouche/nss-gk-api", rev = "e48a946811ffd64abc78de3ee284957d8d1c0d63" }
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[package]
2+
name = "unicode-width"
3+
version = "0.1.999"
4+
edition = "2018"
5+
license = "MPL-2.0"
6+
7+
[lib]
8+
path = "lib.rs"
9+
10+
[dependencies.unicode-width]
11+
version = "0.2.0"
12+
default-features = false
13+
14+
[features]
15+
default = ["unicode-width/default"]
16+
cjk = ["unicode-width/cjk"]
17+
no_std = ["unicode-width/no_std"]
18+
rustc-dep-of-std = ["unicode-width/rustc-dep-of-std"]

build/rust/unicode-width/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
/* This Source Code Form is subject to the terms of the Mozilla Public
2+
* License, v. 2.0. If a copy of the MPL was not distributed with this
3+
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4+
5+
pub use unicode_width::*;

supply-chain/imports.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -704,8 +704,8 @@ user-login = "dtolnay"
704704
user-name = "David Tolnay"
705705

706706
[[publisher.unicode-width]]
707-
version = "0.1.10"
708-
when = "2022-09-13"
707+
version = "0.2.0"
708+
when = "2024-09-19"
709709
user-id = 1139
710710
user-login = "Manishearth"
711711
user-name = "Manish Goregaokar"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"files":{"COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"f22e31fb3559e916864820719a09ab3adbf80301440e1702acf827210bbf76df","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"8a041a4305fb318f5c2cb284046f8480796521d0e829023b0441b5e8469490eb","scripts/unicode.py":"0c53095ef99395338399f9ad218b4481cffcf63774fd61871ed32efb242419f8","src/lib.rs":"38c44436eac069bd8d11203f31ecfef8adfe92da1fce19ba00bdd25aa3fbbe20","src/tables.rs":"c6ddb420c289517bb92973199fd2987b9608f29fc10bb33b5290f39b301ce92f","src/tests.rs":"ff9f331210861ba78040f119a0f6ccfacf5b2ca1ebee430784de0858fad01860"},"package":"c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"}
1+
{"files":{"COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"58a703b95d64c5cf5e84edb838fc5b3e426b7dcf54092bb5ffa4c88eb566b6ed","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"bb05cb954f8897cfd99a9d7b73ed41bf3991ec44fe6e1f500b91e51ae3892f4c","benches/benches.rs":"4cfb510cd83882f31eacc974c65acd2b6a0ce659aadf300de921e659ace7d587","scripts/unicode.py":"c905efbb53328aad971fa784027eee6403a04a2c04b1509d117b191bcbb6128f","src/lib.rs":"af71fa2d16706a1f15c9d3807829d8308c11ed4d974263ccf5a37d21a8c33982","src/tables.rs":"54753db78d67856fd202b4b072772c8c8a3f3297332a4f7bd77c84966946ea79","tests/emoji-test.txt":"d876ee249aa28eaa76cfa6dfaa702847a8d13b062aa488d465d0395ee8137ed9","tests/tests.rs":"eb5482283df9aaaaa090b2bbbc52e59779ed7da75d1254a64725f3df897c4804"},"package":"1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"}

third_party/rust/unicode-width/Cargo.toml

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,31 +10,51 @@
1010
# See Cargo.toml.orig for the original contents.
1111

1212
[package]
13+
edition = "2021"
1314
name = "unicode-width"
14-
version = "0.1.10"
15+
version = "0.2.0"
1516
authors = [
1617
"kwantam <[email protected]>",
1718
"Manish Goregaokar <[email protected]>",
1819
]
19-
exclude = [
20-
"target/*",
21-
"Cargo.lock",
22-
]
20+
build = false
21+
exclude = ["/.github/*"]
22+
autobins = false
23+
autoexamples = false
24+
autotests = false
25+
autobenches = false
2326
description = """
2427
Determine displayed width of `char` and `str` types
2528
according to Unicode Standard Annex #11 rules.
2629
"""
2730
homepage = "https://github.com/unicode-rs/unicode-width"
28-
documentation = "https://unicode-rs.github.io/unicode-width"
2931
readme = "README.md"
3032
keywords = [
3133
"text",
3234
"width",
3335
"unicode",
3436
]
35-
license = "MIT/Apache-2.0"
37+
categories = [
38+
"command-line-interface",
39+
"internationalization",
40+
"no-std::no-alloc",
41+
"text-processing",
42+
]
43+
license = "MIT OR Apache-2.0"
3644
repository = "https://github.com/unicode-rs/unicode-width"
3745

46+
[lib]
47+
name = "unicode_width"
48+
path = "src/lib.rs"
49+
50+
[[test]]
51+
name = "tests"
52+
path = "tests/tests.rs"
53+
54+
[[bench]]
55+
name = "benches"
56+
path = "benches/benches.rs"
57+
3858
[dependencies.compiler_builtins]
3959
version = "0.1"
4060
optional = true
@@ -50,8 +70,8 @@ optional = true
5070
package = "rustc-std-workspace-std"
5171

5272
[features]
53-
bench = []
54-
default = []
73+
cjk = []
74+
default = ["cjk"]
5575
no_std = []
5676
rustc-dep-of-std = [
5777
"std",
Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
1-
# unicode-width
1+
# `unicode-width`
22

3-
Determine displayed width of `char` and `str` types according to
4-
[Unicode Standard Annex #11][UAX11] rules.
3+
[![Build status](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml/badge.svg)](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml)
4+
[![crates.io version](https://img.shields.io/crates/v/unicode-width)](https://crates.io/crates/unicode-width)
5+
[![Docs status](https://img.shields.io/docsrs/unicode-width)](https://docs.rs/unicode-width/)
56

6-
[UAX11]: http://www.unicode.org/reports/tr11/
7+
Determine displayed width of `char` and `str` types according to [Unicode Standard Annex #11][UAX11]
8+
and other portions of the Unicode standard.
79

8-
[![Build Status](https://travis-ci.org/unicode-rs/unicode-width.svg)](https://travis-ci.org/unicode-rs/unicode-width)
10+
This crate is `#![no_std]`.
911

10-
[Documentation](https://unicode-rs.github.io/unicode-width/unicode_width/index.html)
12+
[UAX11]: http://www.unicode.org/reports/tr11/
1113

1214
```rust
13-
extern crate unicode_width;
14-
1515
use unicode_width::UnicodeWidthStr;
1616

1717
fn main() {
1818
let teststr = "Hello, world!";
19-
let width = UnicodeWidthStr::width(teststr);
19+
let width = teststr.width();
2020
println!("{}", teststr);
2121
println!("The above string is {} columns wide.", width);
2222
let width = teststr.width_cjk();
@@ -25,27 +25,26 @@ fn main() {
2525
```
2626

2727
**NOTE:** The computed width values may not match the actual rendered column
28-
width. For example, the woman scientist emoji comprises of a woman emoji, a
29-
zero-width joiner and a microscope emoji.
28+
width. For example, many Brahmic scripts like Devanagari have complex rendering rules
29+
which this crate does not currently handle (and will never fully handle, because
30+
the exact rendering depends on the font):
3031

3132
```rust
3233
extern crate unicode_width;
3334
use unicode_width::UnicodeWidthStr;
3435

3536
fn main() {
36-
assert_eq!(UnicodeWidthStr::width("👩"), 2); // Woman
37-
assert_eq!(UnicodeWidthStr::width("🔬"), 2); // Microscope
38-
assert_eq!(UnicodeWidthStr::width("👩‍🔬"), 4); // Woman scientist
37+
assert_eq!("".width(), 1); // Devanagari letter Ka
38+
assert_eq!("".width(), 1); // Devanagari letter Ssa
39+
assert_eq!("क्ष".width(), 2); // Ka + Virama + Ssa
3940
}
4041
```
4142

42-
See [Unicode Standard Annex #11][UAX11] for precise details on what is and isn't
43-
covered by this crate.
44-
45-
## features
46-
47-
unicode-width does not depend on libstd, so it can be used in crates
48-
with the `#![no_std]` attribute.
43+
Additionally, [defective combining character sequences](https://unicode.org/glossary/#defective_combining_character_sequence)
44+
and nonstandard [Korean jamo](https://unicode.org/glossary/#jamo) sequences may
45+
be rendered with a different width than what this crate says. (This is not an
46+
exhaustive list.) For a list of what this crate *does* handle, see
47+
[docs.rs](https://docs.rs/unicode-width/latest/unicode_width/#rules-for-determining-width).
4948

5049
## crates.io
5150

@@ -54,5 +53,18 @@ to your `Cargo.toml`:
5453

5554
```toml
5655
[dependencies]
57-
unicode-width = "0.1.7"
56+
unicode-width = "0.1.11"
5857
```
58+
59+
60+
## Changelog
61+
62+
63+
### 0.2.0
64+
65+
- Treat `\n` as width 1 (#60)
66+
- Treat ambiguous `Modifier_Letter`s as narrow (#63)
67+
- Support `Grapheme_Cluster_Break=Prepend` (#62)
68+
- Support lots of ligatures (#53)
69+
70+
Note: If you are using `unicode-width` for linebreaking, the change treating `\n` as width 1 _may cause behavior changes_. It is recommended that in such cases you feed already-line segmented text to `unicode-width`. In other words, please apply higher level control character based line breaking protocols before feeding text to `unicode-width`. Relying on any character producing a stable width in this crate is likely the sign of a bug.
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
#![feature(test)]
11+
12+
extern crate test;
13+
14+
use std::iter;
15+
16+
use test::Bencher;
17+
18+
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
19+
20+
#[bench]
21+
fn cargo(b: &mut Bencher) {
22+
let string = iter::repeat('a').take(4096).collect::<String>();
23+
24+
b.iter(|| {
25+
for c in string.chars() {
26+
test::black_box(UnicodeWidthChar::width(c));
27+
}
28+
});
29+
}
30+
31+
#[bench]
32+
#[allow(deprecated)]
33+
fn stdlib(b: &mut Bencher) {
34+
let string = iter::repeat('a').take(4096).collect::<String>();
35+
36+
b.iter(|| {
37+
for c in string.chars() {
38+
test::black_box(c.width());
39+
}
40+
});
41+
}
42+
43+
#[bench]
44+
fn simple_if(b: &mut Bencher) {
45+
let string = iter::repeat('a').take(4096).collect::<String>();
46+
47+
b.iter(|| {
48+
for c in string.chars() {
49+
test::black_box(simple_width_if(c));
50+
}
51+
});
52+
}
53+
54+
#[bench]
55+
fn simple_match(b: &mut Bencher) {
56+
let string = iter::repeat('a').take(4096).collect::<String>();
57+
58+
b.iter(|| {
59+
for c in string.chars() {
60+
test::black_box(simple_width_match(c));
61+
}
62+
});
63+
}
64+
65+
#[inline]
66+
fn simple_width_if(c: char) -> Option<usize> {
67+
let cu = c as u32;
68+
if cu < 127 {
69+
if cu > 31 {
70+
Some(1)
71+
} else if cu == 0 {
72+
Some(0)
73+
} else {
74+
None
75+
}
76+
} else {
77+
UnicodeWidthChar::width(c)
78+
}
79+
}
80+
81+
#[inline]
82+
fn simple_width_match(c: char) -> Option<usize> {
83+
match c as u32 {
84+
cu if cu == 0 => Some(0),
85+
cu if cu < 0x20 => None,
86+
cu if cu < 0x7f => Some(1),
87+
_ => UnicodeWidthChar::width(c),
88+
}
89+
}
90+
91+
#[bench]
92+
fn enwik8(b: &mut Bencher) {
93+
// To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip
94+
let data_path = "bench_data/enwik8";
95+
let string = std::fs::read_to_string(data_path).unwrap_or_default();
96+
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
97+
}
98+
99+
#[bench]
100+
fn jawiki(b: &mut Bencher) {
101+
// To benchmark, download & extract `jawiki-20220501-pages-articles-multistream-index.txt` from
102+
// https://dumps.wikimedia.org/jawiki/20220501/jawiki-20220501-pages-articles-multistream-index.txt.bz2
103+
let data_path = "bench_data/jawiki-20220501-pages-articles-multistream-index.txt";
104+
let string = std::fs::read_to_string(data_path).unwrap_or_default();
105+
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
106+
}
107+
108+
#[bench]
109+
fn emoji(b: &mut Bencher) {
110+
// To benchmark, download emoji-style.txt from https://www.unicode.org/emoji/charts/emoji-style.txt
111+
let data_path = "bench_data/emoji-style.txt";
112+
let string = std::fs::read_to_string(data_path).unwrap_or_default();
113+
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
114+
}

0 commit comments

Comments
 (0)