@@ -187,33 +187,19 @@ fn load_data() -> UnicodeData {
187187 }
188188 }
189189
190- let mut properties: HashMap < & ' static str , Vec < Range < u32 > > > = properties
190+ let mut properties: Vec < ( & ' static str , Vec < Range < u32 > > ) > = properties
191191 . into_iter ( )
192- . map ( |( k, v) | {
193- (
194- k,
195- v. into_iter ( )
196- . flat_map ( |codepoints| match codepoints {
197- Codepoints :: Single ( c) => c
198- . scalar ( )
199- . map ( |ch| ch as u32 ..ch as u32 + 1 )
200- . into_iter ( )
201- . collect :: < Vec < _ > > ( ) ,
202- Codepoints :: Range ( c) => c
203- . into_iter ( )
204- . flat_map ( |c| c. scalar ( ) . map ( |ch| ch as u32 ..ch as u32 + 1 ) )
205- . collect :: < Vec < _ > > ( ) ,
206- } )
207- . collect :: < Vec < Range < u32 > > > ( ) ,
208- )
192+ . map ( |( prop, codepoints) | {
193+ let codepoints = codepoints
194+ . into_iter ( )
195+ . flatten ( )
196+ . flat_map ( |cp| cp. scalar ( ) )
197+ . map ( u32:: from)
198+ . collect :: < Vec < _ > > ( ) ;
199+ ( prop, ranges_from_set ( & codepoints) )
209200 } )
210201 . collect ( ) ;
211202
212- for ranges in properties. values_mut ( ) {
213- merge_ranges ( ranges) ;
214- }
215-
216- let mut properties = properties. into_iter ( ) . collect :: < Vec < _ > > ( ) ;
217203 properties. sort_by_key ( |p| p. 0 ) ;
218204 UnicodeData { ranges : properties, to_lower, to_upper }
219205}
@@ -402,48 +388,13 @@ fn generate_asserts(s: &mut String, property: &str, points: &[u32], truthy: bool
402388 }
403389}
404390
391+ /// Group the elements of `set` into contigous ranges
405392fn ranges_from_set ( set : & [ u32 ] ) -> Vec < Range < u32 > > {
406- let mut ranges = set. iter ( ) . map ( |e| ( * e) ..( * e + 1 ) ) . collect :: < Vec < Range < u32 > > > ( ) ;
407- merge_ranges ( & mut ranges) ;
408- ranges
409- }
410-
411- fn merge_ranges ( ranges : & mut Vec < Range < u32 > > ) {
412- loop {
413- let mut new_ranges = Vec :: new ( ) ;
414- let mut idx_iter = 0 ..( ranges. len ( ) - 1 ) ;
415- let mut should_insert_last = true ;
416- while let Some ( idx) = idx_iter. next ( ) {
417- let cur = ranges[ idx] . clone ( ) ;
418- let next = ranges[ idx + 1 ] . clone ( ) ;
419- if cur. end == next. start {
420- if idx_iter. next ( ) . is_none ( ) {
421- // We're merging the last element
422- should_insert_last = false ;
423- }
424- new_ranges. push ( cur. start ..next. end ) ;
425- } else {
426- // We're *not* merging the last element
427- should_insert_last = true ;
428- new_ranges. push ( cur) ;
429- }
430- }
431- if should_insert_last {
432- new_ranges. push ( ranges. last ( ) . unwrap ( ) . clone ( ) ) ;
433- }
434- if new_ranges. len ( ) == ranges. len ( ) {
435- * ranges = new_ranges;
436- break ;
437- } else {
438- * ranges = new_ranges;
439- }
440- }
441-
442- let mut last_end = None ;
443- for range in ranges {
444- if let Some ( last) = last_end {
445- assert ! ( range. start > last, "{range:?}" ) ;
446- }
447- last_end = Some ( range. end ) ;
448- }
393+ set. chunk_by ( |a, b| a + 1 == * b)
394+ . map ( |chunk| {
395+ let start = * chunk. first ( ) . unwrap ( ) ;
396+ let end = * chunk. last ( ) . unwrap ( ) ;
397+ start..( end + 1 )
398+ } )
399+ . collect ( )
449400}
0 commit comments