Skip to content

Commit 0f50696

Browse files
committed
Auto merge of #145479 - Kmeakin:km/hardcode-char-is-control, r=joboet
Hard-code `char::is_control` Split off from #145219 According to https://www.unicode.org/policies/stability_policy.html#Property_Value, the set of codepoints in `Cc` will never change. So we can hard-code the patterns to match against instead of using a table. This doesn't change the generated assembly, since the lookup table is small enough that[ LLVM is able to inline the whole search](https://godbolt.org/z/bG8dM37YG). But this does reduce the chance of regressions if LLVM's heuristics change in the future, and means less generated Rust code checked in to `unicode-data.rs`.
2 parents e95db59 + 1bb9b15 commit 0f50696

File tree

4 files changed

+5
-28
lines changed

4 files changed

+5
-28
lines changed

library/core/src/char/methods.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,11 @@ impl char {
950950
#[stable(feature = "rust1", since = "1.0.0")]
951951
#[inline]
952952
pub fn is_control(self) -> bool {
953-
unicode::Cc(self)
953+
// According to
954+
// https://www.unicode.org/policies/stability_policy.html#Property_Value,
955+
// the set of codepoints in `Cc` will never change.
956+
// So we can just hard-code the patterns to match against instead of using a table.
957+
matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
954958
}
955959

956960
/// Returns `true` if this `char` has the `Grapheme_Extend` property.

library/core/src/unicode/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ pub use unicode_data::conversions;
1010

1111
#[rustfmt::skip]
1212
pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
13-
pub(crate) use unicode_data::cc::lookup as Cc;
1413
pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
1514
pub(crate) use unicode_data::lowercase::lookup as Lowercase;
1615
pub(crate) use unicode_data::n::lookup as N;

library/core/src/unicode/unicode_data.rs

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -358,31 +358,6 @@ pub mod cased {
358358
}
359359
}
360360

361-
#[rustfmt::skip]
362-
pub mod cc {
363-
use super::ShortOffsetRunHeader;
364-
365-
static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 1] = [
366-
ShortOffsetRunHeader::new(0, 1114272),
367-
];
368-
static OFFSETS: [u8; 5] = [
369-
0, 32, 95, 33, 0,
370-
];
371-
pub fn lookup(c: char) -> bool {
372-
const {
373-
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
374-
let mut i = 0;
375-
while i < SHORT_OFFSET_RUNS.len() {
376-
assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len());
377-
i += 1;
378-
}
379-
}
380-
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`
381-
// and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`.
382-
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
383-
}
384-
}
385-
386361
#[rustfmt::skip]
387362
pub mod grapheme_extend {
388363
use super::ShortOffsetRunHeader;

src/tools/unicode-table-generator/src/main.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ static PROPERTIES: &[&str] = &[
9292
"Case_Ignorable",
9393
"Grapheme_Extend",
9494
"White_Space",
95-
"Cc",
9695
"N",
9796
];
9897

0 commit comments

Comments
 (0)