|
2 | 2 | // Alphabetic : 1723 bytes, 142707 codepoints in 755 ranges (U+0000AA - U+0323B0) using skiplist
|
3 | 3 | // Case_Ignorable : 1043 bytes, 2744 codepoints in 447 ranges (U+0000A8 - U+0E01F0) using skiplist
|
4 | 4 | // Cased : 403 bytes, 4526 codepoints in 157 ranges (U+0000AA - U+01F18A) using skiplist
|
5 |
| -// Cc : 7 bytes, 32 codepoints in 1 ranges (U+000080 - U+0000A0) using skiplist |
| 5 | +// Cc : 0 bytes, 32 codepoints in 1 ranges (U+000080 - U+0000A0) using match |
6 | 6 | // Grapheme_Extend : 887 bytes, 2193 codepoints in 375 ranges (U+000300 - U+0E01F0) using skiplist
|
7 | 7 | // Lowercase : 933 bytes, 2543 codepoints in 674 ranges (U+0000AA - U+01E944) using bitset
|
8 | 8 | // N : 455 bytes, 1901 codepoints in 143 ranges (U+0000B2 - U+01FBFA) using skiplist
|
9 | 9 | // Uppercase : 797 bytes, 1952 codepoints in 655 ranges (U+0000C0 - U+01F18A) using bitset
|
10 |
| -// White_Space : 256 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using cascading |
| 10 | +// White_Space : 0 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using match |
11 | 11 | // to_lower : 11484 bytes
|
12 | 12 | // to_upper : 13432 bytes
|
13 |
| -// Total : 31420 bytes |
| 13 | +// Total : 31157 bytes |
14 | 14 |
|
15 | 15 | #[inline(always)]
|
16 | 16 | const fn bitset_search<
|
@@ -393,33 +393,13 @@ pub mod cased {
|
393 | 393 |
|
394 | 394 | #[rustfmt::skip]
|
395 | 395 | pub mod cc {
|
396 |
| - use super::ShortOffsetRunHeader; |
397 |
| - |
398 |
| - static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 1] = [ |
399 |
| - ShortOffsetRunHeader::new(0, 1114272), |
400 |
| - ]; |
401 |
| - static OFFSETS: [u8; 3] = [ |
402 |
| - 128, 32, 0, |
403 |
| - ]; |
404 | 396 | #[inline]
|
405 |
| - pub fn lookup(c: char) -> bool { |
| 397 | + pub const fn lookup(c: char) -> bool { |
406 | 398 | debug_assert!(!c.is_ascii());
|
407 |
| - (c as u32) >= 0x80 && lookup_slow(c) |
408 |
| - } |
409 |
| - |
410 |
| - #[inline(never)] |
411 |
| - fn lookup_slow(c: char) -> bool { |
412 |
| - const { |
413 |
| - assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); |
414 |
| - let mut i = 0; |
415 |
| - while i < SHORT_OFFSET_RUNS.len() { |
416 |
| - assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); |
417 |
| - i += 1; |
418 |
| - } |
| 399 | + match c as u32 { |
| 400 | + 0x80..=0x9f => true, |
| 401 | + _ => false, |
419 | 402 | }
|
420 |
| - // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` |
421 |
| - // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. |
422 |
| - unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) } |
423 | 403 | }
|
424 | 404 | }
|
425 | 405 |
|
@@ -763,25 +743,18 @@ pub mod uppercase {
|
763 | 743 |
|
764 | 744 | #[rustfmt::skip]
|
765 | 745 | pub mod white_space {
|
766 |
| - static WHITESPACE_MAP: [u8; 256] = [ |
767 |
| - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
768 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
769 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
770 |
| - 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
771 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
772 |
| - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
773 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
774 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
775 |
| - 0, 0, 0, 0, 0, 0, 0, 0, 0, |
776 |
| - ]; |
777 | 746 | #[inline]
|
778 | 747 | pub const fn lookup(c: char) -> bool {
|
779 | 748 | debug_assert!(!c.is_ascii());
|
780 |
| - match c as u32 >> 8 { |
781 |
| - 0 => WHITESPACE_MAP[c as usize & 0xff] & 1 != 0, |
782 |
| - 22 => c as u32 == 0x1680, |
783 |
| - 32 => WHITESPACE_MAP[c as usize & 0xff] & 2 != 0, |
784 |
| - 48 => c as u32 == 0x3000, |
| 749 | + match c as u32 { |
| 750 | + 0x85 => true, |
| 751 | + 0xa0 => true, |
| 752 | + 0x1680 => true, |
| 753 | + 0x2000..=0x200a => true, |
| 754 | + 0x2028..=0x2029 => true, |
| 755 | + 0x202f => true, |
| 756 | + 0x205f => true, |
| 757 | + 0x3000 => true, |
785 | 758 | _ => false,
|
786 | 759 | }
|
787 | 760 | }
|
|
0 commit comments