@@ -100,11 +100,11 @@ static PROPERTIES: &[&str] = &[
100100
101101struct UnicodeData {
102102 ranges : Vec < ( & ' static str , Vec < Range < u32 > > ) > ,
103- to_upper : BTreeMap < u32 , ( u32 , u32 , u32 ) > ,
104- to_lower : BTreeMap < u32 , ( u32 , u32 , u32 ) > ,
103+ to_upper : BTreeMap < u32 , [ u32 ; 3 ] > ,
104+ to_lower : BTreeMap < u32 , [ u32 ; 3 ] > ,
105105}
106106
107- fn to_mapping ( origin : u32 , codepoints : Vec < ucd_parse:: Codepoint > ) -> Option < ( u32 , u32 , u32 ) > {
107+ fn to_mapping ( origin : u32 , codepoints : Vec < ucd_parse:: Codepoint > ) -> Option < [ u32 ; 3 ] > {
108108 let mut a = None ;
109109 let mut b = None ;
110110 let mut c = None ;
@@ -125,7 +125,7 @@ fn to_mapping(origin: u32, codepoints: Vec<ucd_parse::Codepoint>) -> Option<(u32
125125 }
126126 }
127127
128- Some ( ( a. unwrap ( ) , b. unwrap_or ( 0 ) , c. unwrap_or ( 0 ) ) )
128+ Some ( [ a. unwrap ( ) , b. unwrap_or ( 0 ) , c. unwrap_or ( 0 ) ] )
129129}
130130
131131static UNICODE_DIRECTORY : & str = "unicode-downloads" ;
@@ -165,12 +165,12 @@ fn load_data() -> UnicodeData {
165165 if let Some ( mapped) = row. simple_lowercase_mapping
166166 && mapped != row. codepoint
167167 {
168- to_lower. insert ( row. codepoint . value ( ) , ( mapped. value ( ) , 0 , 0 ) ) ;
168+ to_lower. insert ( row. codepoint . value ( ) , [ mapped. value ( ) , 0 , 0 ] ) ;
169169 }
170170 if let Some ( mapped) = row. simple_uppercase_mapping
171171 && mapped != row. codepoint
172172 {
173- to_upper. insert ( row. codepoint . value ( ) , ( mapped. value ( ) , 0 , 0 ) ) ;
173+ to_upper. insert ( row. codepoint . value ( ) , [ mapped. value ( ) , 0 , 0 ] ) ;
174174 }
175175 }
176176
@@ -224,7 +224,7 @@ fn main() {
224224 let ranges_by_property = & unicode_data. ranges ;
225225
226226 if let Some ( path) = test_path {
227- std:: fs:: write ( & path, generate_tests ( ranges_by_property ) . unwrap ( ) ) . unwrap ( ) ;
227+ std:: fs:: write ( & path, generate_tests ( & unicode_data ) . unwrap ( ) ) . unwrap ( ) ;
228228 }
229229
230230 let mut table_file = String :: new ( ) ;
@@ -328,15 +328,15 @@ fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
328328 out
329329}
330330
331- fn generate_tests ( ranges : & [ ( & str , Vec < Range < u32 > > ) ] ) -> Result < String , fmt:: Error > {
331+ fn generate_tests ( data : & UnicodeData ) -> Result < String , fmt:: Error > {
332332 let mut s = String :: new ( ) ;
333333 writeln ! ( s, "#![feature(core_intrinsics)]" ) ?;
334334 writeln ! ( s, "#![allow(internal_features, dead_code)]" ) ?;
335335 writeln ! ( s, "// ignore-tidy-filelength" ) ?;
336336 writeln ! ( s, "use std::intrinsics;" ) ?;
337337 writeln ! ( s, "mod unicode_data;" ) ?;
338338 writeln ! ( s, "fn main() {{" ) ?;
339- for ( property, ranges) in ranges {
339+ for ( property, ranges) in & data . ranges {
340340 let prop = property. to_lowercase ( ) ;
341341 writeln ! ( s, r#" println!("Testing {prop}");"# ) ?;
342342 writeln ! ( s, " {prop}_true();" ) ?;
@@ -355,6 +355,36 @@ fn generate_tests(ranges: &[(&str, Vec<Range<u32>>)]) -> Result<String, fmt::Err
355355 writeln ! ( s, " }}" ) ?;
356356 }
357357
358+ for ( name, conversion) in [ "to_lower" , "to_upper" ] . iter ( ) . zip ( [ & data. to_lower , & data. to_upper ] )
359+ {
360+ writeln ! ( s, r#" println!("Testing {name}");"# ) ?;
361+ for ( c, mapping) in conversion {
362+ let c = char:: from_u32 ( * c) . unwrap ( ) ;
363+ let mapping = mapping. map ( |c| char:: from_u32 ( c) . unwrap ( ) ) ;
364+ writeln ! (
365+ s,
366+ r#" assert_eq!(unicode_data::conversions::{name}({c:?}), {mapping:?});"#
367+ ) ?;
368+ }
369+ let unmapped: Vec < _ > = ( char:: MIN ..=char:: MAX )
370+ . filter ( |c| !c. is_ascii ( ) )
371+ . map ( u32:: from)
372+ . filter ( |c| !conversion. contains_key ( c) )
373+ . collect ( ) ;
374+ let unmapped_ranges = ranges_from_set ( & unmapped) ;
375+ for range in unmapped_ranges {
376+ let start = char:: from_u32 ( range. start ) . unwrap ( ) ;
377+ let end = char:: from_u32 ( range. end - 1 ) . unwrap ( ) ;
378+ writeln ! ( s, " for c in {start:?}..={end:?} {{" ) ?;
379+ writeln ! (
380+ s,
381+ r#" assert_eq!(unicode_data::conversions::{name}(c), [c, '\0', '\0']);"#
382+ ) ?;
383+
384+ writeln ! ( s, " }}" ) ?;
385+ }
386+ }
387+
358388 writeln ! ( s, "}}" ) ?;
359389 Ok ( s)
360390}
0 commit comments