@@ -187,33 +187,19 @@ fn load_data() -> UnicodeData {
187
187
}
188
188
}
189
189
190
- let mut properties: HashMap < & ' static str , Vec < Range < u32 > > > = properties
190
+ let mut properties: Vec < ( & ' static str , Vec < Range < u32 > > ) > = properties
191
191
. into_iter ( )
192
- . map ( |( k, v) | {
193
- (
194
- k,
195
- v. into_iter ( )
196
- . flat_map ( |codepoints| match codepoints {
197
- Codepoints :: Single ( c) => c
198
- . scalar ( )
199
- . map ( |ch| ch as u32 ..ch as u32 + 1 )
200
- . into_iter ( )
201
- . collect :: < Vec < _ > > ( ) ,
202
- Codepoints :: Range ( c) => c
203
- . into_iter ( )
204
- . flat_map ( |c| c. scalar ( ) . map ( |ch| ch as u32 ..ch as u32 + 1 ) )
205
- . collect :: < Vec < _ > > ( ) ,
206
- } )
207
- . collect :: < Vec < Range < u32 > > > ( ) ,
208
- )
192
+ . map ( |( prop, codepoints) | {
193
+ let codepoints = codepoints
194
+ . into_iter ( )
195
+ . flatten ( )
196
+ . flat_map ( |cp| cp. scalar ( ) )
197
+ . map ( u32:: from)
198
+ . collect :: < Vec < _ > > ( ) ;
199
+ ( prop, ranges_from_set ( & codepoints) )
209
200
} )
210
201
. collect ( ) ;
211
202
212
- for ranges in properties. values_mut ( ) {
213
- merge_ranges ( ranges) ;
214
- }
215
-
216
- let mut properties = properties. into_iter ( ) . collect :: < Vec < _ > > ( ) ;
217
203
properties. sort_by_key ( |p| p. 0 ) ;
218
204
UnicodeData { ranges : properties, to_lower, to_upper }
219
205
}
@@ -402,48 +388,13 @@ fn generate_asserts(s: &mut String, property: &str, points: &[u32], truthy: bool
402
388
}
403
389
}
404
390
391
+ /// Group the elements of `set` into contigous ranges
405
392
fn ranges_from_set ( set : & [ u32 ] ) -> Vec < Range < u32 > > {
406
- let mut ranges = set. iter ( ) . map ( |e| ( * e) ..( * e + 1 ) ) . collect :: < Vec < Range < u32 > > > ( ) ;
407
- merge_ranges ( & mut ranges) ;
408
- ranges
409
- }
410
-
411
- fn merge_ranges ( ranges : & mut Vec < Range < u32 > > ) {
412
- loop {
413
- let mut new_ranges = Vec :: new ( ) ;
414
- let mut idx_iter = 0 ..( ranges. len ( ) - 1 ) ;
415
- let mut should_insert_last = true ;
416
- while let Some ( idx) = idx_iter. next ( ) {
417
- let cur = ranges[ idx] . clone ( ) ;
418
- let next = ranges[ idx + 1 ] . clone ( ) ;
419
- if cur. end == next. start {
420
- if idx_iter. next ( ) . is_none ( ) {
421
- // We're merging the last element
422
- should_insert_last = false ;
423
- }
424
- new_ranges. push ( cur. start ..next. end ) ;
425
- } else {
426
- // We're *not* merging the last element
427
- should_insert_last = true ;
428
- new_ranges. push ( cur) ;
429
- }
430
- }
431
- if should_insert_last {
432
- new_ranges. push ( ranges. last ( ) . unwrap ( ) . clone ( ) ) ;
433
- }
434
- if new_ranges. len ( ) == ranges. len ( ) {
435
- * ranges = new_ranges;
436
- break ;
437
- } else {
438
- * ranges = new_ranges;
439
- }
440
- }
441
-
442
- let mut last_end = None ;
443
- for range in ranges {
444
- if let Some ( last) = last_end {
445
- assert ! ( range. start > last, "{range:?}" ) ;
446
- }
447
- last_end = Some ( range. end ) ;
448
- }
393
+ set. chunk_by ( |a, b| a + 1 == * b)
394
+ . map ( |chunk| {
395
+ let start = * chunk. first ( ) . unwrap ( ) ;
396
+ let end = * chunk. last ( ) . unwrap ( ) ;
397
+ start..( end + 1 )
398
+ } )
399
+ . collect ( )
449
400
}
0 commit comments