Skip to content

Commit 3a3f30b

Browse files
committed
refactor: Add tests for case conversions
1 parent ddde353 commit 3a3f30b

File tree

2 files changed

+41
-11
lines changed

2 files changed

+41
-11
lines changed

src/tools/unicode-table-generator/src/case_mapping.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [usize; 2])
2121
(file, [lower_size, upper_size])
2222
}
2323

24-
fn generate_tables(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> (String, usize) {
24+
fn generate_tables(case: &str, data: &BTreeMap<u32, [u32; 3]>) -> (String, usize) {
2525
let mut mappings = Vec::with_capacity(data.len());
2626
let mut multis = Vec::new();
2727

28-
for (&key, &(a, b, c)) in data.iter() {
28+
for (&key, &[a, b, c]) in data.iter() {
2929
let key = char::from_u32(key).unwrap();
3030

3131
if key.is_ascii() {

src/tools/unicode-table-generator/src/main.rs

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,11 @@ static PROPERTIES: &[&str] = &[
100100

101101
struct UnicodeData {
102102
ranges: Vec<(&'static str, Vec<Range<u32>>)>,
103-
to_upper: BTreeMap<u32, (u32, u32, u32)>,
104-
to_lower: BTreeMap<u32, (u32, u32, u32)>,
103+
to_upper: BTreeMap<u32, [u32; 3]>,
104+
to_lower: BTreeMap<u32, [u32; 3]>,
105105
}
106106

107-
fn to_mapping(origin: u32, codepoints: Vec<ucd_parse::Codepoint>) -> Option<(u32, u32, u32)> {
107+
fn to_mapping(origin: u32, codepoints: Vec<ucd_parse::Codepoint>) -> Option<[u32; 3]> {
108108
let mut a = None;
109109
let mut b = None;
110110
let mut c = None;
@@ -125,7 +125,7 @@ fn to_mapping(origin: u32, codepoints: Vec<ucd_parse::Codepoint>) -> Option<(u32
125125
}
126126
}
127127

128-
Some((a.unwrap(), b.unwrap_or(0), c.unwrap_or(0)))
128+
Some([a.unwrap(), b.unwrap_or(0), c.unwrap_or(0)])
129129
}
130130

131131
static UNICODE_DIRECTORY: &str = "unicode-downloads";
@@ -165,12 +165,12 @@ fn load_data() -> UnicodeData {
165165
if let Some(mapped) = row.simple_lowercase_mapping
166166
&& mapped != row.codepoint
167167
{
168-
to_lower.insert(row.codepoint.value(), (mapped.value(), 0, 0));
168+
to_lower.insert(row.codepoint.value(), [mapped.value(), 0, 0]);
169169
}
170170
if let Some(mapped) = row.simple_uppercase_mapping
171171
&& mapped != row.codepoint
172172
{
173-
to_upper.insert(row.codepoint.value(), (mapped.value(), 0, 0));
173+
to_upper.insert(row.codepoint.value(), [mapped.value(), 0, 0]);
174174
}
175175
}
176176

@@ -224,7 +224,7 @@ fn main() {
224224
let ranges_by_property = &unicode_data.ranges;
225225

226226
if let Some(path) = test_path {
227-
std::fs::write(&path, generate_tests(ranges_by_property).unwrap()).unwrap();
227+
std::fs::write(&path, generate_tests(&unicode_data).unwrap()).unwrap();
228228
}
229229

230230
let mut table_file = String::new();
@@ -328,15 +328,15 @@ fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
328328
out
329329
}
330330

331-
fn generate_tests(ranges: &[(&str, Vec<Range<u32>>)]) -> Result<String, fmt::Error> {
331+
fn generate_tests(data: &UnicodeData) -> Result<String, fmt::Error> {
332332
let mut s = String::new();
333333
writeln!(s, "#![feature(core_intrinsics)]")?;
334334
writeln!(s, "#![allow(internal_features, dead_code)]")?;
335335
writeln!(s, "// ignore-tidy-filelength")?;
336336
writeln!(s, "use std::intrinsics;")?;
337337
writeln!(s, "mod unicode_data;")?;
338338
writeln!(s, "fn main() {{")?;
339-
for (property, ranges) in ranges {
339+
for (property, ranges) in &data.ranges {
340340
let prop = property.to_lowercase();
341341
writeln!(s, r#" println!("Testing {prop}");"#)?;
342342
writeln!(s, " {prop}_true();")?;
@@ -355,6 +355,36 @@ fn generate_tests(ranges: &[(&str, Vec<Range<u32>>)]) -> Result<String, fmt::Err
355355
writeln!(s, " }}")?;
356356
}
357357

358+
for (name, conversion) in ["to_lower", "to_upper"].iter().zip([&data.to_lower, &data.to_upper])
359+
{
360+
writeln!(s, r#" println!("Testing {name}");"#)?;
361+
for (c, mapping) in conversion {
362+
let c = char::from_u32(*c).unwrap();
363+
let mapping = mapping.map(|c| char::from_u32(c).unwrap());
364+
writeln!(
365+
s,
366+
r#" assert_eq!(unicode_data::conversions::{name}({c:?}), {mapping:?});"#
367+
)?;
368+
}
369+
let unmapped: Vec<_> = (char::MIN..=char::MAX)
370+
.filter(|c| !c.is_ascii())
371+
.map(u32::from)
372+
.filter(|c| !conversion.contains_key(c))
373+
.collect();
374+
let unmapped_ranges = ranges_from_set(&unmapped);
375+
for range in unmapped_ranges {
376+
let start = char::from_u32(range.start).unwrap();
377+
let end = char::from_u32(range.end - 1).unwrap();
378+
writeln!(s, " for c in {start:?}..={end:?} {{")?;
379+
writeln!(
380+
s,
381+
r#" assert_eq!(unicode_data::conversions::{name}(c), [c, '\0', '\0']);"#
382+
)?;
383+
384+
writeln!(s, " }}")?;
385+
}
386+
}
387+
358388
writeln!(s, "}}")?;
359389
Ok(s)
360390
}

0 commit comments

Comments
 (0)