Skip to content

Commit f52c712

Browse files
committed
Improve union algorithm to reuse allocated storage, if possible
1 parent 45f6c4b commit f52c712

File tree

1 file changed

+81
-37
lines changed

1 file changed

+81
-37
lines changed

regex-syntax/src/hir/interval.rs

Lines changed: 81 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -133,27 +133,38 @@ impl<I: Interval> IntervalSet<I> {
133133
return;
134134
}
135135

136-
// No way to know what the new size will be, so for now we assume that
137-
// in typical cases, the union of a set of classes won't have many
138-
// overlaps.
139-
let mut ranges =
140-
Vec::with_capacity(self.ranges.len() + other.ranges.len());
141-
142-
let merged =
143-
MergeIter::new(self.ranges.iter(), other.ranges.iter()).copied();
144-
145-
let final_range = merged.reduce(|range, next_range| {
146-
range.union_right(&next_range).unwrap_or_else(|| {
147-
ranges.push(range);
148-
next_range
149-
})
150-
});
151-
152-
if let Some(final_range) = final_range {
153-
ranges.push(final_range);
136+
// If our allocated capacity is sufficient to hold both ourself and
137+
// the new range, we just merge the ranges in-place, then canonicalize.
138+
if self.ranges.capacity() >= self.ranges.len() + other.ranges.len() {
139+
merge_sorted_into(&mut self.ranges, other.ranges.iter().copied());
140+
union_sorted(&mut self.ranges)
141+
}
142+
// Otherwise, build a new vector by merging the two ranges and unioning
143+
// them as we go
144+
else {
145+
// No way to know what the new size will be, so for now we assume that
146+
// in typical cases, the union of a set of classes won't have many
147+
// overlaps.
148+
let mut ranges =
149+
Vec::with_capacity(self.ranges.len() + other.ranges.len());
150+
151+
let final_range =
152+
MergeIter::new(self.ranges.iter(), other.ranges.iter())
153+
.copied()
154+
.reduce(|range, next_range| {
155+
range.union_right(&next_range).unwrap_or_else(|| {
156+
ranges.push(range);
157+
next_range
158+
})
159+
});
160+
161+
if let Some(final_range) = final_range {
162+
ranges.push(final_range);
163+
}
164+
165+
self.ranges = ranges;
154166
}
155167

156-
self.ranges = ranges;
157168
self.folded = self.folded && other.folded;
158169
}
159170

@@ -398,24 +409,7 @@ impl<I: Interval> IntervalSet<I> {
398409

399410
self.ranges.sort_unstable();
400411
assert!(!self.ranges.is_empty());
401-
402-
// `merge_idx` is the range into which we're merging contiguous ranges.
403-
let mut merge_idx = 0;
404-
405-
for i in 1..self.ranges.len() {
406-
if let Some(union) =
407-
self.ranges[merge_idx].union_right(&self.ranges[i])
408-
{
409-
self.ranges[merge_idx] = union;
410-
} else {
411-
merge_idx += 1;
412-
self.ranges[merge_idx] = self.ranges[i];
413-
}
414-
}
415-
416-
// At this point, `merge_idx` is the index of the last range that was
417-
// merged into, so we truncate.
418-
self.ranges.truncate(merge_idx + 1);
412+
union_sorted(&mut self.ranges)
419413
}
420414

421415
/// Returns true if and only if this class is in a canonical ordering.
@@ -699,4 +693,54 @@ where
699693
}
700694
}
701695

696+
/// Given a pair of sorted lists, merge them into `dest` so that `dest`
697+
/// remains sorted
698+
fn merge_sorted_into<T: Default + Ord>(
699+
dest: &mut Vec<T>,
700+
others: impl DoubleEndedIterator<Item = T> + ExactSizeIterator,
701+
) {
702+
let mut dest_len = dest.len();
703+
let mut insert_idx = dest.len() + others.len();
704+
705+
dest.resize_with(dest.len() + others.len(), Default::default);
706+
707+
others.rev().for_each(|new_item| {
708+
// First, shift all the items that are ``> new_item`` rightward
709+
// in the vec
710+
for item_idx in (0..dest_len).rev() {
711+
dest_len -= 1;
712+
insert_idx -= 1;
713+
714+
if dest[item_idx] > new_item {
715+
dest.swap(item_idx, insert_idx);
716+
} else {
717+
break;
718+
}
719+
}
720+
721+
// Then insert this item
722+
insert_idx -= 1;
723+
dest[insert_idx] = new_item;
724+
});
725+
}
726+
727+
// Given a sorted list of intervals, union them together into a canonical form.
728+
fn union_sorted(ranges: &mut Vec<impl Interval>) {
729+
// `merge_idx` is the range into which we're merging contiguous ranges.
730+
let mut merge_idx = 0;
731+
732+
for i in 1..ranges.len() {
733+
if let Some(union) = ranges[merge_idx].union_right(&ranges[i]) {
734+
ranges[merge_idx] = union;
735+
} else {
736+
merge_idx += 1;
737+
ranges[merge_idx] = ranges[i];
738+
}
739+
}
740+
741+
// At this point, `merge_idx` is the index of the last range that was
742+
// merged into, so we truncate.
743+
ranges.truncate(merge_idx + 1);
744+
}
745+
702746
// Tests for interval sets are written in src/hir.rs against the public API.

0 commit comments

Comments
 (0)