Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion library/core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -985,7 +985,11 @@ impl char {
#[doc(hidden)]
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
pub fn is_cased(self) -> bool {
if self.is_ascii() { self.is_ascii_alphabetic() } else { unicode::Cased(self) }
if self.is_ascii() {
self.is_ascii_alphabetic()
} else {
unicode::Lowercase(self) || unicode::Uppercase(self) || unicode::Lt(self)
}
}

/// Returns `true` if this `char` has the `Case_Ignorable` property.
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/unicode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
// for use in alloc, not re-exported in std.
#[rustfmt::skip]
pub use unicode_data::case_ignorable::lookup as Case_Ignorable;
pub use unicode_data::cased::lookup as Cased;
pub use unicode_data::conversions;

#[rustfmt::skip]
pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
pub(crate) use unicode_data::lowercase::lookup as Lowercase;
pub(crate) use unicode_data::lt::lookup as Lt;
pub(crate) use unicode_data::n::lookup as N;
pub(crate) use unicode_data::uppercase::lookup as Uppercase;
pub(crate) use unicode_data::white_space::lookup as White_Space;
Expand Down
90 changes: 35 additions & 55 deletions library/core/src/unicode/unicode_data.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
//! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!
// Alphabetic : 1723 bytes, 147369 codepoints in 759 ranges (U+0000AA - U+03347A) using skiplist
// Case_Ignorable : 1063 bytes, 2789 codepoints in 459 ranges (U+0000A8 - U+0E01F0) using skiplist
// Cased : 401 bytes, 4580 codepoints in 156 ranges (U+0000AA - U+01F18A) using skiplist
// Grapheme_Extend : 899 bytes, 2232 codepoints in 383 ranges (U+000300 - U+0E01F0) using skiplist
// Lowercase : 943 bytes, 2569 codepoints in 676 ranges (U+0000AA - U+01E944) using bitset
// Lt : 33 bytes, 31 codepoints in 10 ranges (U+0001C5 - U+001FFD) using skiplist
// N : 463 bytes, 1914 codepoints in 145 ranges (U+0000B2 - U+01FBFA) using skiplist
// Uppercase : 799 bytes, 1980 codepoints in 659 ranges (U+0000C0 - U+01F18A) using bitset
// White_Space : 256 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using cascading
// to_lower : 11708 bytes
// to_upper : 13656 bytes
// Total : 31911 bytes
// Total : 31543 bytes

#[inline(always)]
const fn bitset_search<
Expand Down Expand Up @@ -336,59 +336,6 @@ pub mod case_ignorable {
}
}

#[rustfmt::skip]
pub mod cased {
use super::ShortOffsetRunHeader;

static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 22] = [
ShortOffsetRunHeader::new(0, 4256), ShortOffsetRunHeader::new(51, 5024),
ShortOffsetRunHeader::new(61, 7296), ShortOffsetRunHeader::new(65, 7958),
ShortOffsetRunHeader::new(74, 9398), ShortOffsetRunHeader::new(149, 11264),
ShortOffsetRunHeader::new(151, 42560), ShortOffsetRunHeader::new(163, 43824),
ShortOffsetRunHeader::new(177, 64256), ShortOffsetRunHeader::new(183, 65313),
ShortOffsetRunHeader::new(187, 66560), ShortOffsetRunHeader::new(191, 67456),
ShortOffsetRunHeader::new(213, 68736), ShortOffsetRunHeader::new(221, 71840),
ShortOffsetRunHeader::new(229, 93760), ShortOffsetRunHeader::new(231, 119808),
ShortOffsetRunHeader::new(237, 120486), ShortOffsetRunHeader::new(274, 122624),
ShortOffsetRunHeader::new(297, 122928), ShortOffsetRunHeader::new(303, 125184),
ShortOffsetRunHeader::new(305, 127280), ShortOffsetRunHeader::new(307, 1241482),
];
static OFFSETS: [u8; 313] = [
170, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 2, 35, 7, 2, 30, 5, 96, 1, 42, 4,
2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, 1,
5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, 2, 8,
1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116,
1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, 1, 2, 4,
5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, 18, 30, 132,
102, 3, 4, 1, 77, 20, 6, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6, 26, 0, 80, 96,
36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3, 1, 42, 1, 9, 0,
51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 32, 25, 2, 25, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2,
4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25,
1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6, 0,
62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
];
#[inline]
pub fn lookup(c: char) -> bool {
debug_assert!(!c.is_ascii());
(c as u32) >= 0xaa && lookup_slow(c)
}

#[inline(never)]
fn lookup_slow(c: char) -> bool {
const {
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
let mut i = 0;
while i < SHORT_OFFSET_RUNS.len() {
assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len());
i += 1;
}
}
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`
// and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`.
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
}
}

#[rustfmt::skip]
pub mod grapheme_extend {
use super::ShortOffsetRunHeader;
Expand Down Expand Up @@ -573,6 +520,39 @@ pub mod lowercase {
}
}

#[rustfmt::skip]
pub mod lt {
use super::ShortOffsetRunHeader;

static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 3] = [
ShortOffsetRunHeader::new(0, 453), ShortOffsetRunHeader::new(1, 8072),
ShortOffsetRunHeader::new(9, 1122301),
];
static OFFSETS: [u8; 21] = [
0, 1, 2, 1, 2, 1, 38, 1, 0, 8, 8, 8, 8, 8, 12, 1, 15, 1, 47, 1, 0,
];
#[inline]
pub fn lookup(c: char) -> bool {
debug_assert!(!c.is_ascii());
(c as u32) >= 0x1c5 && lookup_slow(c)
}

#[inline(never)]
fn lookup_slow(c: char) -> bool {
const {
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
let mut i = 0;
while i < SHORT_OFFSET_RUNS.len() {
assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len());
i += 1;
}
}
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`
// and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`.
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
}
}

#[rustfmt::skip]
pub mod n {
use super::ShortOffsetRunHeader;
Expand Down
2 changes: 1 addition & 1 deletion src/tools/unicode-table-generator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static PROPERTIES: &[&str] = &[
"Alphabetic",
"Lowercase",
"Uppercase",
"Cased",
"Lt",
"Case_Ignorable",
"Grapheme_Extend",
"White_Space",
Expand Down
Loading