1212
1313#![ allow( missing_docs, non_upper_case_globals, non_snake_case) ]
1414
15- /// Represents a Unicode Version.
16- ///
17- /// See also: <http://www.unicode.org/versions/>
18- #[ derive( Clone , Copy , Debug , Eq , Ord , PartialEq , PartialOrd ) ]
19- pub struct UnicodeVersion {
20- /// Major version.
21- pub major : u32 ,
22-
23- /// Minor version.
24- pub minor : u32 ,
25-
26- /// Micro (or Update) version.
27- pub micro : u32 ,
28-
29- // Private field to keep struct expandable.
30- _priv : ( ) ,
31- }
15+ use version:: UnicodeVersion ;
16+ use bool_trie:: { BoolTrie , SmallBoolTrie } ;
3217
3318/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
3419/// `CharExt` and `UnicodeStrPrelude` traits are based on.
@@ -38,76 +23,8 @@ pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
3823 micro : 0 ,
3924 _priv : ( ) ,
4025} ;
41-
42-
43- // BoolTrie is a trie for representing a set of Unicode codepoints. It is
44- // implemented with postfix compression (sharing of identical child nodes),
45- // which gives both compact size and fast lookup.
46- //
47- // The space of Unicode codepoints is divided into 3 subareas, each
48- // represented by a trie with different depth. In the first (0..0x800), there
49- // is no trie structure at all; each u64 entry corresponds to a bitvector
50- // effectively holding 64 bool values.
51- //
52- // In the second (0x800..0x10000), each child of the root node represents a
53- // 64-wide subrange, but instead of storing the full 64-bit value of the leaf,
54- // the trie stores an 8-bit index into a shared table of leaf values. This
55- // exploits the fact that in reasonable sets, many such leaves can be shared.
56- //
57- // In the third (0x10000..0x110000), each child of the root node represents a
58- // 4096-wide subrange, and the trie stores an 8-bit index into a 64-byte slice
59- // of a child tree. Each of these 64 bytes represents an index into the table
60- // of shared 64-bit leaf values. This exploits the sparse structure in the
61- // non-BMP range of most Unicode sets.
62- pub struct BoolTrie {
63- // 0..0x800 (corresponding to 1 and 2 byte utf-8 sequences)
64- r1 : [ u64 ; 32 ] , // leaves
65-
66- // 0x800..0x10000 (corresponding to 3 byte utf-8 sequences)
67- r2 : [ u8 ; 992 ] , // first level
68- r3 : & ' static [ u64 ] , // leaves
69-
70- // 0x10000..0x110000 (corresponding to 4 byte utf-8 sequences)
71- r4 : [ u8 ; 256 ] , // first level
72- r5 : & ' static [ u8 ] , // second level
73- r6 : & ' static [ u64 ] , // leaves
74- }
75-
76- fn trie_range_leaf ( c : usize , bitmap_chunk : u64 ) -> bool {
77- ( ( bitmap_chunk >> ( c & 63 ) ) & 1 ) != 0
78- }
79-
80- fn trie_lookup_range_table ( c : char , r : & ' static BoolTrie ) -> bool {
81- let c = c as usize ;
82- if c < 0x800 {
83- trie_range_leaf ( c, r. r1 [ c >> 6 ] )
84- } else if c < 0x10000 {
85- let child = r. r2 [ ( c >> 6 ) - 0x20 ] ;
86- trie_range_leaf ( c, r. r3 [ child as usize ] )
87- } else {
88- let child = r. r4 [ ( c >> 12 ) - 0x10 ] ;
89- let leaf = r. r5 [ ( ( child as usize ) << 6 ) + ( ( c >> 6 ) & 0x3f ) ] ;
90- trie_range_leaf ( c, r. r6 [ leaf as usize ] )
91- }
92- }
93-
94- pub struct SmallBoolTrie {
95- r1 : & ' static [ u8 ] , // first level
96- r2 : & ' static [ u64 ] , // leaves
97- }
98-
99- impl SmallBoolTrie {
100- fn lookup ( & self , c : char ) -> bool {
101- let c = c as usize ;
102- match self . r1 . get ( c >> 6 ) {
103- Some ( & child) => trie_range_leaf ( c, self . r2 [ child as usize ] ) ,
104- None => false ,
105- }
106- }
107- }
108-
10926pub mod general_category {
110- pub const Cc_table : & ' static super :: SmallBoolTrie = & super :: SmallBoolTrie {
27+ pub const Cc_table : & super :: SmallBoolTrie = & super :: SmallBoolTrie {
11128 r1 : & [
11229 0 , 1 , 0
11330 ] ,
@@ -120,7 +37,7 @@ pub mod general_category {
12037 Cc_table . lookup ( c)
12138 }
12239
123- pub const N_table : & ' static super :: BoolTrie = & super :: BoolTrie {
40+ pub const N_table : & super :: BoolTrie = & super :: BoolTrie {
12441 r1 : [
12542 0x03ff000000000000 , 0x0000000000000000 , 0x0000000000000000 , 0x0000000000000000 ,
12643 0x0000000000000000 , 0x0000000000000000 , 0x0000000000000000 , 0x0000000000000000 ,
@@ -212,13 +129,13 @@ pub mod general_category {
212129 } ;
213130
214131 pub fn N ( c : char ) -> bool {
215- super :: trie_lookup_range_table ( c , N_table )
132+ N_table . lookup ( c )
216133 }
217134
218135}
219136
220137pub mod derived_property {
221- pub const Alphabetic_table : & ' static super :: BoolTrie = & super :: BoolTrie {
138+ pub const Alphabetic_table : & super :: BoolTrie = & super :: BoolTrie {
222139 r1 : [
223140 0x0000000000000000 , 0x07fffffe07fffffe , 0x0420040000000000 , 0xff7fffffff7fffff ,
224141 0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff ,
@@ -397,10 +314,10 @@ pub mod derived_property {
397314 } ;
398315
399316 pub fn Alphabetic ( c : char ) -> bool {
400- super :: trie_lookup_range_table ( c , Alphabetic_table )
317+ Alphabetic_table . lookup ( c )
401318 }
402319
403- pub const Case_Ignorable_table : & ' static super :: BoolTrie = & super :: BoolTrie {
320+ pub const Case_Ignorable_table : & super :: BoolTrie = & super :: BoolTrie {
404321 r1 : [
405322 0x0400408000000000 , 0x0000000140000000 , 0x0190a10000000000 , 0x0000000000000000 ,
406323 0x0000000000000000 , 0x0000000000000000 , 0x0000000000000000 , 0x0000000000000000 ,
@@ -529,10 +446,10 @@ pub mod derived_property {
529446 } ;
530447
531448 pub fn Case_Ignorable ( c : char ) -> bool {
532- super :: trie_lookup_range_table ( c , Case_Ignorable_table )
449+ Case_Ignorable_table . lookup ( c )
533450 }
534451
535- pub const Cased_table : & ' static super :: BoolTrie = & super :: BoolTrie {
452+ pub const Cased_table : & super :: BoolTrie = & super :: BoolTrie {
536453 r1 : [
537454 0x0000000000000000 , 0x07fffffe07fffffe , 0x0420040000000000 , 0xff7fffffff7fffff ,
538455 0xffffffffffffffff , 0xffffffffffffffff , 0xf7ffffffffffffff , 0xfffffffffffffff0 ,
@@ -628,10 +545,10 @@ pub mod derived_property {
628545 } ;
629546
630547 pub fn Cased ( c : char ) -> bool {
631- super :: trie_lookup_range_table ( c , Cased_table )
548+ Cased_table . lookup ( c )
632549 }
633550
634- pub const Lowercase_table : & ' static super :: BoolTrie = & super :: BoolTrie {
551+ pub const Lowercase_table : & super :: BoolTrie = & super :: BoolTrie {
635552 r1 : [
636553 0x0000000000000000 , 0x07fffffe00000000 , 0x0420040000000000 , 0xff7fffff80000000 ,
637554 0x55aaaaaaaaaaaaaa , 0xd4aaaaaaaaaaab55 , 0xe6512d2a4e243129 , 0xaa29aaaab5555240 ,
@@ -725,10 +642,10 @@ pub mod derived_property {
725642 } ;
726643
727644 pub fn Lowercase ( c : char ) -> bool {
728- super :: trie_lookup_range_table ( c , Lowercase_table )
645+ Lowercase_table . lookup ( c )
729646 }
730647
731- pub const Uppercase_table : & ' static super :: BoolTrie = & super :: BoolTrie {
648+ pub const Uppercase_table : & super :: BoolTrie = & super :: BoolTrie {
732649 r1 : [
733650 0x0000000000000000 , 0x0000000007fffffe , 0x0000000000000000 , 0x000000007f7fffff ,
734651 0xaa55555555555555 , 0x2b555555555554aa , 0x11aed2d5b1dbced6 , 0x55d255554aaaa490 ,
@@ -823,10 +740,10 @@ pub mod derived_property {
823740 } ;
824741
825742 pub fn Uppercase ( c : char ) -> bool {
826- super :: trie_lookup_range_table ( c , Uppercase_table )
743+ Uppercase_table . lookup ( c )
827744 }
828745
829- pub const XID_Continue_table : & ' static super :: BoolTrie = & super :: BoolTrie {
746+ pub const XID_Continue_table : & super :: BoolTrie = & super :: BoolTrie {
830747 r1 : [
831748 0x03ff000000000000 , 0x07fffffe87fffffe , 0x04a0040000000000 , 0xff7fffffff7fffff ,
832749 0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff ,
@@ -998,10 +915,10 @@ pub mod derived_property {
998915 } ;
999916
1000917 pub fn XID_Continue ( c : char ) -> bool {
1001- super :: trie_lookup_range_table ( c , XID_Continue_table )
918+ XID_Continue_table . lookup ( c )
1002919 }
1003920
1004- pub const XID_Start_table : & ' static super :: BoolTrie = & super :: BoolTrie {
921+ pub const XID_Start_table : & super :: BoolTrie = & super :: BoolTrie {
1005922 r1 : [
1006923 0x0000000000000000 , 0x07fffffe07fffffe , 0x0420040000000000 , 0xff7fffffff7fffff ,
1007924 0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff ,
@@ -1175,13 +1092,13 @@ pub mod derived_property {
11751092 } ;
11761093
11771094 pub fn XID_Start ( c : char ) -> bool {
1178- super :: trie_lookup_range_table ( c , XID_Start_table )
1095+ XID_Start_table . lookup ( c )
11791096 }
11801097
11811098}
11821099
11831100pub mod property {
1184- pub const Pattern_White_Space_table : & ' static super :: SmallBoolTrie = & super :: SmallBoolTrie {
1101+ pub const Pattern_White_Space_table : & super :: SmallBoolTrie = & super :: SmallBoolTrie {
11851102 r1 : & [
11861103 0 , 1 , 2 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
11871104 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
@@ -1198,7 +1115,7 @@ pub mod property {
11981115 Pattern_White_Space_table . lookup ( c)
11991116 }
12001117
1201- pub const White_Space_table : & ' static super :: SmallBoolTrie = & super :: SmallBoolTrie {
1118+ pub const White_Space_table : & super :: SmallBoolTrie = & super :: SmallBoolTrie {
12021119 r1 : & [
12031120 0 , 1 , 2 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
12041121 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
@@ -1238,11 +1155,11 @@ pub mod conversions {
12381155 }
12391156 }
12401157
1241- fn bsearch_case_table ( c : char , table : & ' static [ ( char , [ char ; 3 ] ) ] ) -> Option < usize > {
1158+ fn bsearch_case_table ( c : char , table : & [ ( char , [ char ; 3 ] ) ] ) -> Option < usize > {
12421159 table. binary_search_by ( |& ( key, _) | key. cmp ( & c) ) . ok ( )
12431160 }
12441161
1245- const to_lowercase_table: & ' static [ ( char , [ char ; 3 ] ) ] = & [
1162+ const to_lowercase_table: & [ ( char , [ char ; 3 ] ) ] = & [
12461163 ( '\u{41}' , [ '\u{61}' , '\0' , '\0' ] ) , ( '\u{42}' , [ '\u{62}' , '\0' , '\0' ] ) , ( '\u{43}' ,
12471164 [ '\u{63}' , '\0' , '\0' ] ) , ( '\u{44}' , [ '\u{64}' , '\0' , '\0' ] ) , ( '\u{45}' , [ '\u{65}' , '\0' ,
12481165 '\0' ] ) , ( '\u{46}' , [ '\u{66}' , '\0' , '\0' ] ) , ( '\u{47}' , [ '\u{67}' , '\0' , '\0' ] ) , ( '\u{48}' ,
@@ -1826,7 +1743,7 @@ pub mod conversions {
18261743 ( '\u{1e920}' , [ '\u{1e942}' , '\0' , '\0' ] ) , ( '\u{1e921}' , [ '\u{1e943}' , '\0' , '\0' ] )
18271744 ] ;
18281745
1829- const to_uppercase_table: & ' static [ ( char , [ char ; 3 ] ) ] = & [
1746+ const to_uppercase_table: & [ ( char , [ char ; 3 ] ) ] = & [
18301747 ( '\u{61}' , [ '\u{41}' , '\0' , '\0' ] ) , ( '\u{62}' , [ '\u{42}' , '\0' , '\0' ] ) , ( '\u{63}' ,
18311748 [ '\u{43}' , '\0' , '\0' ] ) , ( '\u{64}' , [ '\u{44}' , '\0' , '\0' ] ) , ( '\u{65}' , [ '\u{45}' , '\0' ,
18321749 '\0' ] ) , ( '\u{66}' , [ '\u{46}' , '\0' , '\0' ] ) , ( '\u{67}' , [ '\u{47}' , '\0' , '\0' ] ) , ( '\u{68}' ,
0 commit comments