Skip to content

Commit cf7fa87

Browse files
committed
refactor: rewrite ranges_from_set
The `merge_ranges` function was very complicated and hard to understand. Forunately, we can use `slice::chunk_by` to achieve the same thing.
1 parent fcab139 commit cf7fa87

File tree

1 file changed

+17
-66
lines changed
  • src/tools/unicode-table-generator/src

1 file changed

+17
-66
lines changed

src/tools/unicode-table-generator/src/main.rs

Lines changed: 17 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -187,33 +187,19 @@ fn load_data() -> UnicodeData {
187187
}
188188
}
189189

190-
let mut properties: HashMap<&'static str, Vec<Range<u32>>> = properties
190+
let mut properties: Vec<(&'static str, Vec<Range<u32>>)> = properties
191191
.into_iter()
192-
.map(|(k, v)| {
193-
(
194-
k,
195-
v.into_iter()
196-
.flat_map(|codepoints| match codepoints {
197-
Codepoints::Single(c) => c
198-
.scalar()
199-
.map(|ch| ch as u32..ch as u32 + 1)
200-
.into_iter()
201-
.collect::<Vec<_>>(),
202-
Codepoints::Range(c) => c
203-
.into_iter()
204-
.flat_map(|c| c.scalar().map(|ch| ch as u32..ch as u32 + 1))
205-
.collect::<Vec<_>>(),
206-
})
207-
.collect::<Vec<Range<u32>>>(),
208-
)
192+
.map(|(prop, codepoints)| {
193+
let codepoints = codepoints
194+
.into_iter()
195+
.flatten()
196+
.flat_map(|cp| cp.scalar())
197+
.map(u32::from)
198+
.collect::<Vec<_>>();
199+
(prop, ranges_from_set(&codepoints))
209200
})
210201
.collect();
211202

212-
for ranges in properties.values_mut() {
213-
merge_ranges(ranges);
214-
}
215-
216-
let mut properties = properties.into_iter().collect::<Vec<_>>();
217203
properties.sort_by_key(|p| p.0);
218204
UnicodeData { ranges: properties, to_lower, to_upper }
219205
}
@@ -402,48 +388,13 @@ fn generate_asserts(s: &mut String, property: &str, points: &[u32], truthy: bool
402388
}
403389
}
404390

391+
/// Group the elements of `set` into contigous ranges
405392
fn ranges_from_set(set: &[u32]) -> Vec<Range<u32>> {
406-
let mut ranges = set.iter().map(|e| (*e)..(*e + 1)).collect::<Vec<Range<u32>>>();
407-
merge_ranges(&mut ranges);
408-
ranges
409-
}
410-
411-
fn merge_ranges(ranges: &mut Vec<Range<u32>>) {
412-
loop {
413-
let mut new_ranges = Vec::new();
414-
let mut idx_iter = 0..(ranges.len() - 1);
415-
let mut should_insert_last = true;
416-
while let Some(idx) = idx_iter.next() {
417-
let cur = ranges[idx].clone();
418-
let next = ranges[idx + 1].clone();
419-
if cur.end == next.start {
420-
if idx_iter.next().is_none() {
421-
// We're merging the last element
422-
should_insert_last = false;
423-
}
424-
new_ranges.push(cur.start..next.end);
425-
} else {
426-
// We're *not* merging the last element
427-
should_insert_last = true;
428-
new_ranges.push(cur);
429-
}
430-
}
431-
if should_insert_last {
432-
new_ranges.push(ranges.last().unwrap().clone());
433-
}
434-
if new_ranges.len() == ranges.len() {
435-
*ranges = new_ranges;
436-
break;
437-
} else {
438-
*ranges = new_ranges;
439-
}
440-
}
441-
442-
let mut last_end = None;
443-
for range in ranges {
444-
if let Some(last) = last_end {
445-
assert!(range.start > last, "{range:?}");
446-
}
447-
last_end = Some(range.end);
448-
}
393+
set.chunk_by(|a, b| a + 1 == *b)
394+
.map(|chunk| {
395+
let start = *chunk.first().unwrap();
396+
let end = *chunk.last().unwrap();
397+
start..(end + 1)
398+
})
399+
.collect()
449400
}

0 commit comments

Comments
 (0)