diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 76f54db287079..49132ddd38a63 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -985,7 +985,11 @@ impl char { #[doc(hidden)] #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")] pub fn is_cased(self) -> bool { - if self.is_ascii() { self.is_ascii_alphabetic() } else { unicode::Cased(self) } + if self.is_ascii() { + self.is_ascii_alphabetic() + } else { + unicode::Lowercase(self) || unicode::Uppercase(self) || unicode::Lt(self) + } } /// Returns `true` if this `char` has the `Case_Ignorable` property. diff --git a/library/core/src/unicode/mod.rs b/library/core/src/unicode/mod.rs index c71fa754e68fb..8a13c5322cac2 100644 --- a/library/core/src/unicode/mod.rs +++ b/library/core/src/unicode/mod.rs @@ -5,13 +5,13 @@ // for use in alloc, not re-exported in std. #[rustfmt::skip] pub use unicode_data::case_ignorable::lookup as Case_Ignorable; -pub use unicode_data::cased::lookup as Cased; pub use unicode_data::conversions; #[rustfmt::skip] pub(crate) use unicode_data::alphabetic::lookup as Alphabetic; pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend; pub(crate) use unicode_data::lowercase::lookup as Lowercase; +pub(crate) use unicode_data::lt::lookup as Lt; pub(crate) use unicode_data::n::lookup as N; pub(crate) use unicode_data::uppercase::lookup as Uppercase; pub(crate) use unicode_data::white_space::lookup as White_Space; diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs index 3c38b44224f87..95b1ae216d0b1 100644 --- a/library/core/src/unicode/unicode_data.rs +++ b/library/core/src/unicode/unicode_data.rs @@ -1,15 +1,15 @@ //! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually! // Alphabetic : 1723 bytes, 147369 codepoints in 759 ranges (U+0000AA - U+03347A) using skiplist // Case_Ignorable : 1063 bytes, 2789 codepoints in 459 ranges (U+0000A8 - U+0E01F0) using skiplist -// Cased : 401 bytes, 4580 codepoints in 156 ranges (U+0000AA - U+01F18A) using skiplist // Grapheme_Extend : 899 bytes, 2232 codepoints in 383 ranges (U+000300 - U+0E01F0) using skiplist // Lowercase : 943 bytes, 2569 codepoints in 676 ranges (U+0000AA - U+01E944) using bitset +// Lt : 33 bytes, 31 codepoints in 10 ranges (U+0001C5 - U+001FFD) using skiplist // N : 463 bytes, 1914 codepoints in 145 ranges (U+0000B2 - U+01FBFA) using skiplist // Uppercase : 799 bytes, 1980 codepoints in 659 ranges (U+0000C0 - U+01F18A) using bitset // White_Space : 256 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using cascading // to_lower : 11708 bytes // to_upper : 13656 bytes -// Total : 31911 bytes +// Total : 31543 bytes #[inline(always)] const fn bitset_search< @@ -336,59 +336,6 @@ pub mod case_ignorable { } } -#[rustfmt::skip] -pub mod cased { - use super::ShortOffsetRunHeader; - - static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 22] = [ - ShortOffsetRunHeader::new(0, 4256), ShortOffsetRunHeader::new(51, 5024), - ShortOffsetRunHeader::new(61, 7296), ShortOffsetRunHeader::new(65, 7958), - ShortOffsetRunHeader::new(74, 9398), ShortOffsetRunHeader::new(149, 11264), - ShortOffsetRunHeader::new(151, 42560), ShortOffsetRunHeader::new(163, 43824), - ShortOffsetRunHeader::new(177, 64256), ShortOffsetRunHeader::new(183, 65313), - ShortOffsetRunHeader::new(187, 66560), ShortOffsetRunHeader::new(191, 67456), - ShortOffsetRunHeader::new(213, 68736), ShortOffsetRunHeader::new(221, 71840), - ShortOffsetRunHeader::new(229, 93760), ShortOffsetRunHeader::new(231, 119808), - ShortOffsetRunHeader::new(237, 120486), ShortOffsetRunHeader::new(274, 122624), - ShortOffsetRunHeader::new(297, 122928), ShortOffsetRunHeader::new(303, 125184), - ShortOffsetRunHeader::new(305, 127280), ShortOffsetRunHeader::new(307, 1241482), - ]; - static OFFSETS: [u8; 313] = [ - 170, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 2, 35, 7, 2, 30, 5, 96, 1, 42, 4, - 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, 1, - 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, 2, 8, - 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116, - 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, 1, 2, 4, - 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, 18, 30, 132, - 102, 3, 4, 1, 77, 20, 6, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6, 26, 0, 80, 96, - 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3, 1, 42, 1, 9, 0, - 51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 32, 25, 2, 25, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2, - 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25, - 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6, 0, - 62, 0, 68, 0, 26, 6, 26, 6, 26, 0, - ]; - #[inline] - pub fn lookup(c: char) -> bool { - debug_assert!(!c.is_ascii()); - (c as u32) >= 0xaa && lookup_slow(c) - } - - #[inline(never)] - fn lookup_slow(c: char) -> bool { - const { - assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); - let mut i = 0; - while i < SHORT_OFFSET_RUNS.len() { - assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); - i += 1; - } - } - // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` - // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. - unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) } - } -} - #[rustfmt::skip] pub mod grapheme_extend { use super::ShortOffsetRunHeader; @@ -573,6 +520,39 @@ pub mod lowercase { } } +#[rustfmt::skip] +pub mod lt { + use super::ShortOffsetRunHeader; + + static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 3] = [ + ShortOffsetRunHeader::new(0, 453), ShortOffsetRunHeader::new(1, 8072), + ShortOffsetRunHeader::new(9, 1122301), + ]; + static OFFSETS: [u8; 21] = [ + 0, 1, 2, 1, 2, 1, 38, 1, 0, 8, 8, 8, 8, 8, 12, 1, 15, 1, 47, 1, 0, + ]; + #[inline] + pub fn lookup(c: char) -> bool { + debug_assert!(!c.is_ascii()); + (c as u32) >= 0x1c5 && lookup_slow(c) + } + + #[inline(never)] + fn lookup_slow(c: char) -> bool { + const { + assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); + let mut i = 0; + while i < SHORT_OFFSET_RUNS.len() { + assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); + i += 1; + } + } + // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` + // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. + unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) } + } +} + #[rustfmt::skip] pub mod n { use super::ShortOffsetRunHeader; diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs index ded9205ffc4b9..02b668508b292 100644 --- a/src/tools/unicode-table-generator/src/main.rs +++ b/src/tools/unicode-table-generator/src/main.rs @@ -90,7 +90,7 @@ static PROPERTIES: &[&str] = &[ "Alphabetic", "Lowercase", "Uppercase", - "Cased", + "Lt", "Case_Ignorable", "Grapheme_Extend", "White_Space",