Skip to content

Commit 3d5b2b8

Browse files
committed
refactor: Hard-code char::is_control
According to https://www.unicode.org/policies/stability_policy.html#Property_Value, the set of codepoints in `Cc` will never change. So we can hard-code the patterns to match against instead of using a table.
1 parent b7fa8ef commit 3d5b2b8

File tree

4 files changed

+5
-16
lines changed

4 files changed

+5
-16
lines changed

library/core/src/char/methods.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,11 @@ impl char {
950950
#[stable(feature = "rust1", since = "1.0.0")]
951951
#[inline]
952952
pub fn is_control(self) -> bool {
953-
if self.is_ascii() { self.is_ascii_control() } else { unicode::Cc(self) }
953+
// According to
954+
// https://www.unicode.org/policies/stability_policy.html#Property_Value,
955+
// the set of codepoints in `Cc` will never change. So we can hard-code
956+
// the patterns to match against instead of using a table.
957+
matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
954958
}
955959

956960
/// Returns `true` if this `char` has the `Grapheme_Extend` property.

library/core/src/unicode/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ pub use unicode_data::conversions;
88
#[rustfmt::skip]
99
pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
1010
pub(crate) use unicode_data::case_ignorable::lookup as Case_Ignorable;
11-
pub(crate) use unicode_data::cc::lookup as Cc;
1211
pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
1312
pub(crate) use unicode_data::lowercase::lookup as Lowercase;
1413
pub(crate) use unicode_data::lt::lookup as Lt;

library/core/src/unicode/unicode_data.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
///! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!
22
// Alphabetic : 1723 bytes, 142707 codepoints in 755 ranges (U+0000AA - U+0323B0) using skiplist
33
// Case_Ignorable : 1043 bytes, 2744 codepoints in 447 ranges (U+0000A8 - U+0E01F0) using skiplist
4-
// Cc : 0 bytes, 32 codepoints in 1 ranges (U+000080 - U+0000A0) using match
54
// Grapheme_Extend : 887 bytes, 2193 codepoints in 375 ranges (U+000300 - U+0E01F0) using skiplist
65
// Lowercase : 933 bytes, 2543 codepoints in 674 ranges (U+0000AA - U+01E944) using bitset
76
// Lt : 0 bytes, 31 codepoints in 10 ranges (U+0001C5 - U+001FFD) using match
@@ -338,18 +337,6 @@ pub mod case_ignorable {
338337
}
339338
}
340339

341-
#[rustfmt::skip]
342-
pub mod cc {
343-
#[inline]
344-
pub const fn lookup(c: char) -> bool {
345-
debug_assert!(!c.is_ascii());
346-
match c as u32 {
347-
0x80..=0x9f => true,
348-
_ => false,
349-
}
350-
}
351-
}
352-
353340
#[rustfmt::skip]
354341
pub mod grapheme_extend {
355342
use super::ShortOffsetRunHeader;

src/tools/unicode-table-generator/src/main.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ static PROPERTIES: &[&str] = &[
9494
"Case_Ignorable",
9595
"Grapheme_Extend",
9696
"White_Space",
97-
"Cc",
9897
"N",
9998
];
10099

0 commit comments

Comments
 (0)