Skip to content

Commit fcab139

Browse files
committed
refactor: Include size of case conversion tables
Include the sizes of the `to_lowercase` and `to_uppercase` tables in the total size calculations.
1 parent 5314f24 commit fcab139

File tree

3 files changed

+42
-18
lines changed

3 files changed

+42
-18
lines changed

library/core/src/unicode/unicode_data.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
// N : 457 bytes, 1911 codepoints in 144 ranges (U+000030 - U+01FBFA) using skiplist
99
// Uppercase : 799 bytes, 1978 codepoints in 656 ranges (U+000041 - U+01F18A) using bitset
1010
// White_Space : 256 bytes, 25 codepoints in 10 ranges (U+000009 - U+003001) using cascading
11-
// Total : 6530 bytes
11+
// to_lower : 11484 bytes
12+
// to_upper : 13432 bytes
13+
// Total : 31446 bytes
1214

1315
#[inline(always)]
1416
const fn bitset_search<
@@ -782,7 +784,7 @@ pub mod conversions {
782784
}
783785
}
784786

785-
static LOWERCASE_TABLE: &[(char, u32)] = &[
787+
static LOWERCASE_TABLE: &[(char, u32); 1434] = &[
786788
('\u{c0}', 224), ('\u{c1}', 225), ('\u{c2}', 226), ('\u{c3}', 227), ('\u{c4}', 228),
787789
('\u{c5}', 229), ('\u{c6}', 230), ('\u{c7}', 231), ('\u{c8}', 232), ('\u{c9}', 233),
788790
('\u{ca}', 234), ('\u{cb}', 235), ('\u{cc}', 236), ('\u{cd}', 237), ('\u{ce}', 238),
@@ -1132,11 +1134,11 @@ pub mod conversions {
11321134
('\u{1e921}', 125251),
11331135
];
11341136

1135-
static LOWERCASE_TABLE_MULTI: &[[char; 3]] = &[
1137+
static LOWERCASE_TABLE_MULTI: &[[char; 3]; 1] = &[
11361138
['i', '\u{307}', '\u{0}'],
11371139
];
11381140

1139-
static UPPERCASE_TABLE: &[(char, u32)] = &[
1141+
static UPPERCASE_TABLE: &[(char, u32); 1526] = &[
11401142
('\u{b5}', 924), ('\u{df}', 4194304), ('\u{e0}', 192), ('\u{e1}', 193), ('\u{e2}', 194),
11411143
('\u{e3}', 195), ('\u{e4}', 196), ('\u{e5}', 197), ('\u{e6}', 198), ('\u{e7}', 199),
11421144
('\u{e8}', 200), ('\u{e9}', 201), ('\u{ea}', 202), ('\u{eb}', 203), ('\u{ec}', 204),
@@ -1509,7 +1511,7 @@ pub mod conversions {
15091511
('\u{1e941}', 125215), ('\u{1e942}', 125216), ('\u{1e943}', 125217),
15101512
];
15111513

1512-
static UPPERCASE_TABLE_MULTI: &[[char; 3]] = &[
1514+
static UPPERCASE_TABLE_MULTI: &[[char; 3]; 102] = &[
15131515
['S', 'S', '\u{0}'], ['\u{2bc}', 'N', '\u{0}'], ['J', '\u{30c}', '\u{0}'],
15141516
['\u{399}', '\u{308}', '\u{301}'], ['\u{3a5}', '\u{308}', '\u{301}'],
15151517
['\u{535}', '\u{552}', '\u{0}'], ['H', '\u{331}', '\u{0}'], ['T', '\u{308}', '\u{0}'],

src/tools/unicode-table-generator/src/case_mapping.rs

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,22 @@ use crate::{UnicodeData, fmt_list};
66

77
const INDEX_MASK: u32 = 1 << 22;
88

9-
pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String {
9+
pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [usize; 2]) {
1010
let mut file = String::new();
1111

1212
write!(file, "const INDEX_MASK: u32 = 0x{INDEX_MASK:x};").unwrap();
1313
file.push_str("\n\n");
1414
file.push_str(HEADER.trim_start());
1515
file.push('\n');
16-
file.push_str(&generate_tables("LOWER", &data.to_lower));
16+
let (lower_tables, lower_size) = generate_tables("LOWER", &data.to_lower);
17+
file.push_str(&lower_tables);
1718
file.push_str("\n\n");
18-
file.push_str(&generate_tables("UPPER", &data.to_upper));
19-
file
19+
let (upper_tables, upper_size) = generate_tables("UPPER", &data.to_upper);
20+
file.push_str(&upper_tables);
21+
(file, [lower_size, upper_size])
2022
}
2123

22-
fn generate_tables(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> String {
24+
fn generate_tables(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> (String, usize) {
2325
let mut mappings = Vec::with_capacity(data.len());
2426
let mut multis = Vec::new();
2527

@@ -46,16 +48,31 @@ fn generate_tables(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> String
4648
}
4749

4850
let mut tables = String::new();
49-
50-
write!(tables, "static {}CASE_TABLE: &[(char, u32)] = &[{}];", case, fmt_list(mappings))
51-
.unwrap();
51+
let mut size = 0;
52+
53+
size += size_of_val(mappings.as_slice());
54+
write!(
55+
tables,
56+
"static {}CASE_TABLE: &[(char, u32); {}] = &[{}];",
57+
case,
58+
mappings.len(),
59+
fmt_list(mappings),
60+
)
61+
.unwrap();
5262

5363
tables.push_str("\n\n");
5464

55-
write!(tables, "static {}CASE_TABLE_MULTI: &[[char; 3]] = &[{}];", case, fmt_list(multis))
56-
.unwrap();
57-
58-
tables
65+
size += size_of_val(multis.as_slice());
66+
write!(
67+
tables,
68+
"static {}CASE_TABLE_MULTI: &[[char; 3]; {}] = &[{}];",
69+
case,
70+
multis.len(),
71+
fmt_list(multis),
72+
)
73+
.unwrap();
74+
75+
(tables, size)
5976
}
6077

6178
struct CharEscape(char);

src/tools/unicode-table-generator/src/main.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,11 @@ fn main() {
269269
));
270270
total_bytes += emitter.bytes_used;
271271
}
272+
let (conversions, sizes) = case_mapping::generate_case_mapping(&unicode_data);
273+
for (name, size) in ["to_lower", "to_upper"].iter().zip(sizes) {
274+
table_file.push_str(&format!("// {:16}: {:5} bytes\n", name, size));
275+
total_bytes += size;
276+
}
272277
table_file.push_str(&format!("// {:16}: {:5} bytes\n", "Total", total_bytes));
273278

274279
// Include the range search function
@@ -280,7 +285,7 @@ fn main() {
280285

281286
table_file.push('\n');
282287

283-
modules.push((String::from("conversions"), case_mapping::generate_case_mapping(&unicode_data)));
288+
modules.push((String::from("conversions"), conversions));
284289

285290
for (name, contents) in modules {
286291
table_file.push_str("#[rustfmt::skip]\n");

0 commit comments

Comments
 (0)