@@ -100,11 +100,11 @@ static PROPERTIES: &[&str] = &[
100
100
101
101
struct UnicodeData {
102
102
ranges : Vec < ( & ' static str , Vec < Range < u32 > > ) > ,
103
- to_upper : BTreeMap < u32 , ( u32 , u32 , u32 ) > ,
104
- to_lower : BTreeMap < u32 , ( u32 , u32 , u32 ) > ,
103
+ to_upper : BTreeMap < u32 , [ u32 ; 3 ] > ,
104
+ to_lower : BTreeMap < u32 , [ u32 ; 3 ] > ,
105
105
}
106
106
107
- fn to_mapping ( origin : u32 , codepoints : Vec < ucd_parse:: Codepoint > ) -> Option < ( u32 , u32 , u32 ) > {
107
+ fn to_mapping ( origin : u32 , codepoints : Vec < ucd_parse:: Codepoint > ) -> Option < [ u32 ; 3 ] > {
108
108
let mut a = None ;
109
109
let mut b = None ;
110
110
let mut c = None ;
@@ -125,7 +125,7 @@ fn to_mapping(origin: u32, codepoints: Vec<ucd_parse::Codepoint>) -> Option<(u32
125
125
}
126
126
}
127
127
128
- Some ( ( a. unwrap ( ) , b. unwrap_or ( 0 ) , c. unwrap_or ( 0 ) ) )
128
+ Some ( [ a. unwrap ( ) , b. unwrap_or ( 0 ) , c. unwrap_or ( 0 ) ] )
129
129
}
130
130
131
131
static UNICODE_DIRECTORY : & str = "unicode-downloads" ;
@@ -165,12 +165,12 @@ fn load_data() -> UnicodeData {
165
165
if let Some ( mapped) = row. simple_lowercase_mapping
166
166
&& mapped != row. codepoint
167
167
{
168
- to_lower. insert ( row. codepoint . value ( ) , ( mapped. value ( ) , 0 , 0 ) ) ;
168
+ to_lower. insert ( row. codepoint . value ( ) , [ mapped. value ( ) , 0 , 0 ] ) ;
169
169
}
170
170
if let Some ( mapped) = row. simple_uppercase_mapping
171
171
&& mapped != row. codepoint
172
172
{
173
- to_upper. insert ( row. codepoint . value ( ) , ( mapped. value ( ) , 0 , 0 ) ) ;
173
+ to_upper. insert ( row. codepoint . value ( ) , [ mapped. value ( ) , 0 , 0 ] ) ;
174
174
}
175
175
}
176
176
@@ -224,7 +224,7 @@ fn main() {
224
224
let ranges_by_property = & unicode_data. ranges ;
225
225
226
226
if let Some ( path) = test_path {
227
- std:: fs:: write ( & path, generate_tests ( ranges_by_property ) . unwrap ( ) ) . unwrap ( ) ;
227
+ std:: fs:: write ( & path, generate_tests ( & unicode_data ) . unwrap ( ) ) . unwrap ( ) ;
228
228
}
229
229
230
230
let mut table_file = String :: new ( ) ;
@@ -328,15 +328,15 @@ fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
328
328
out
329
329
}
330
330
331
- fn generate_tests ( ranges : & [ ( & str , Vec < Range < u32 > > ) ] ) -> Result < String , fmt:: Error > {
331
+ fn generate_tests ( data : & UnicodeData ) -> Result < String , fmt:: Error > {
332
332
let mut s = String :: new ( ) ;
333
333
writeln ! ( s, "#![feature(core_intrinsics)]" ) ?;
334
334
writeln ! ( s, "#![allow(internal_features, dead_code)]" ) ?;
335
335
writeln ! ( s, "// ignore-tidy-filelength" ) ?;
336
336
writeln ! ( s, "use std::intrinsics;" ) ?;
337
337
writeln ! ( s, "mod unicode_data;" ) ?;
338
338
writeln ! ( s, "fn main() {{" ) ?;
339
- for ( property, ranges) in ranges {
339
+ for ( property, ranges) in & data . ranges {
340
340
let prop = property. to_lowercase ( ) ;
341
341
writeln ! ( s, r#" println!("Testing {prop}");"# ) ?;
342
342
writeln ! ( s, " {prop}_true();" ) ?;
@@ -355,6 +355,36 @@ fn generate_tests(ranges: &[(&str, Vec<Range<u32>>)]) -> Result<String, fmt::Err
355
355
writeln ! ( s, " }}" ) ?;
356
356
}
357
357
358
+ for ( name, conversion) in [ "to_lower" , "to_upper" ] . iter ( ) . zip ( [ & data. to_lower , & data. to_upper ] )
359
+ {
360
+ writeln ! ( s, r#" println!("Testing {name}");"# ) ?;
361
+ for ( c, mapping) in conversion {
362
+ let c = char:: from_u32 ( * c) . unwrap ( ) ;
363
+ let mapping = mapping. map ( |c| char:: from_u32 ( c) . unwrap ( ) ) ;
364
+ writeln ! (
365
+ s,
366
+ r#" assert_eq!(unicode_data::conversions::{name}({c:?}), {mapping:?});"#
367
+ ) ?;
368
+ }
369
+ let unmapped: Vec < _ > = ( char:: MIN ..=char:: MAX )
370
+ . filter ( |c| !c. is_ascii ( ) )
371
+ . map ( u32:: from)
372
+ . filter ( |c| !conversion. contains_key ( c) )
373
+ . collect ( ) ;
374
+ let unmapped_ranges = ranges_from_set ( & unmapped) ;
375
+ for range in unmapped_ranges {
376
+ let start = char:: from_u32 ( range. start ) . unwrap ( ) ;
377
+ let end = char:: from_u32 ( range. end - 1 ) . unwrap ( ) ;
378
+ writeln ! ( s, " for c in {start:?}..={end:?} {{" ) ?;
379
+ writeln ! (
380
+ s,
381
+ r#" assert_eq!(unicode_data::conversions::{name}(c), [c, '\0', '\0']);"#
382
+ ) ?;
383
+
384
+ writeln ! ( s, " }}" ) ?;
385
+ }
386
+ }
387
+
358
388
writeln ! ( s, "}}" ) ?;
359
389
Ok ( s)
360
390
}
0 commit comments