Skip to content

Commit 84cfc36

Browse files
Consolidation consolidation (#552)
* Make consolidate_into a trait method * Streaming consolidate_into
1 parent 63212f9 commit 84cfc36

File tree

2 files changed

+47
-49
lines changed

2 files changed

+47
-49
lines changed

src/consolidation.rs

Lines changed: 44 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,41 @@ pub trait ConsolidateLayout: Container {
255255

256256
/// Compare two items by key to sort containers.
257257
fn cmp(item1: &Self::Item<'_>, item2: &Self::Item<'_>) -> Ordering;
258+
259+
/// Consolidate the supplied container.
260+
fn consolidate_into(&mut self, target: &mut Self) {
261+
// Sort input data
262+
let mut permutation = Vec::with_capacity(self.len());
263+
permutation.extend(self.drain());
264+
permutation.sort_by(|a, b| Self::cmp(a, b));
265+
266+
// Iterate over the data, accumulating diffs for like keys.
267+
let mut iter = permutation.drain(..);
268+
if let Some(item) = iter.next() {
269+
270+
let (k, d) = Self::into_parts(item);
271+
let mut prev_key = k;
272+
let mut prev_diff = d.into_owned();
273+
274+
for item in iter {
275+
let (next_key, next_diff) = Self::into_parts(item);
276+
if next_key == prev_key {
277+
prev_diff.plus_equals(&next_diff);
278+
}
279+
else {
280+
if !prev_diff.is_zero() {
281+
target.push_with_diff(prev_key, prev_diff);
282+
}
283+
prev_key = next_key;
284+
prev_diff = next_diff.into_owned();
285+
}
286+
}
287+
288+
if !prev_diff.is_zero() {
289+
target.push_with_diff(prev_key, prev_diff);
290+
}
291+
}
292+
}
258293
}
259294

260295
impl<D, T, R> ConsolidateLayout for Vec<(D, T, R)>
@@ -278,6 +313,12 @@ where
278313
fn push_with_diff(&mut self, (data, time): Self::Key<'_>, diff: Self::DiffOwned) {
279314
self.push((data, time, diff));
280315
}
316+
317+
/// Consolidate the supplied container.
318+
fn consolidate_into(&mut self, target: &mut Self) {
319+
consolidate_updates(self);
320+
std::mem::swap(self, target);
321+
}
281322
}
282323

283324
impl<K, V, T, R> ConsolidateLayout for FlatStack<TupleABCRegion<TupleABRegion<K, V>, T, R>>
@@ -308,49 +349,6 @@ where
308349
}
309350
}
310351

311-
/// Consolidate the supplied container.
312-
pub fn consolidate_container<C: ConsolidateLayout>(container: &mut C, target: &mut C) {
313-
// Sort input data
314-
let mut permutation = Vec::with_capacity(container.len());
315-
permutation.extend(container.drain());
316-
permutation.sort_by(|a, b| C::cmp(a, b));
317-
318-
// Consolidate sorted data.
319-
let mut previous: Option<(C::Key<'_>, C::DiffOwned)> = None;
320-
// TODO: We should ensure that `target` has sufficient capacity, but `Container` doesn't
321-
// offer a suitable API.
322-
for item in permutation.drain(..) {
323-
let (key, diff) = C::into_parts(item);
324-
match &mut previous {
325-
// Initial iteration, remember key and diff.
326-
// TODO: Opportunity for GatCow for diff.
327-
None => previous = Some((key, diff.into_owned())),
328-
Some((prevkey, d)) => {
329-
// Second and following iteration, compare and accumulate or emit.
330-
if key == *prevkey {
331-
// Keys match, keep accumulating.
332-
d.plus_equals(&diff);
333-
} else {
334-
// Keys don't match, write down result if non-zero.
335-
if !d.is_zero() {
336-
// Unwrap because we checked for `Some` above.
337-
let (prevkey, diff) = previous.take().unwrap();
338-
target.push_with_diff(prevkey, diff);
339-
}
340-
// Remember current key and diff as `previous`
341-
previous = Some((key, diff.into_owned()));
342-
}
343-
}
344-
}
345-
}
346-
// Write any residual data, if non-zero.
347-
if let Some((previtem, d)) = previous {
348-
if !d.is_zero() {
349-
target.push_with_diff(previtem, d);
350-
}
351-
}
352-
}
353-
354352

355353

356354
#[cfg(test)]
@@ -445,11 +443,11 @@ mod tests {
445443
}
446444

447445
#[test]
448-
fn test_consolidate_container() {
446+
fn test_consolidate_into() {
449447
let mut data = vec![(1, 1, 1), (2, 1, 1), (1, 1, -1)];
450448
let mut target = Vec::default();
451449
data.sort();
452-
consolidate_container(&mut data, &mut target);
450+
data.consolidate_into(&mut target);
453451
assert_eq!(target, [(2, 1, 1)]);
454452
}
455453

@@ -477,7 +475,7 @@ mod tests {
477475
data2.extend((0..LEN).map(|i| (i/4, 1, -2isize + ((i % 4) as isize))));
478476
data.sort_by(|x,y| x.0.cmp(&y.0));
479477
let start = std::time::Instant::now();
480-
consolidate_container(&mut data, &mut target);
478+
data.consolidate_into(&mut target);
481479
duration += start.elapsed();
482480

483481
consolidate_updates(&mut data2);

src/trace/implementations/chunker.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::collections::VecDeque;
44
use timely::Container;
55
use timely::container::columnation::{Columnation, TimelyStack};
66
use timely::container::{ContainerBuilder, PushInto, SizableContainer};
7-
use crate::consolidation::{consolidate_updates, consolidate_container, ConsolidateLayout};
7+
use crate::consolidation::{consolidate_updates, ConsolidateLayout};
88
use crate::difference::Semigroup;
99

1010
/// Chunk a stream of vectors into chains of vectors.
@@ -269,7 +269,7 @@ where
269269
self.pending.push(item);
270270
if self.pending.at_capacity() {
271271
let starting_len = self.pending.len();
272-
consolidate_container(&mut self.pending, &mut self.empty);
272+
self.pending.consolidate_into(&mut self.empty);
273273
std::mem::swap(&mut self.pending, &mut self.empty);
274274
self.empty.clear();
275275
if self.pending.len() > starting_len / 2 {
@@ -300,7 +300,7 @@ where
300300

301301
fn finish(&mut self) -> Option<&mut Self::Container> {
302302
if !self.pending.is_empty() {
303-
consolidate_container(&mut self.pending, &mut self.empty);
303+
self.pending.consolidate_into(&mut self.empty);
304304
std::mem::swap(&mut self.pending, &mut self.empty);
305305
self.empty.clear();
306306
if !self.pending.is_empty() {

0 commit comments

Comments
 (0)