Skip to content

Commit da18a35

Browse files
Demonstrate container input batching (#556)
1 parent 5976a2f commit da18a35

File tree

1 file changed

+25
-7
lines changed

1 file changed

+25
-7
lines changed

examples/columnar.rs

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Wordcount based on `columnar`.
22
33
use {
4-
timely::container::CapacityContainerBuilder,
4+
timely::container::{Container, CapacityContainerBuilder},
55
timely::dataflow::channels::pact::ExchangeCore,
66
timely::dataflow::InputHandleCore,
77
timely::dataflow::ProbeHandle,
@@ -43,8 +43,8 @@ fn main() {
4343
let data = data_input.to_stream(scope);
4444
let keys = keys_input.to_stream(scope);
4545

46-
let data_pact = ExchangeCore::<ColumnBuilder<((String,()),u64,i64)>,_>::new_core(|x: &((&str,()),&u64,&i64)| (x.0).0.as_bytes().iter().sum::<u8>() as u64);
47-
let keys_pact = ExchangeCore::<ColumnBuilder<((String,()),u64,i64)>,_>::new_core(|x: &((&str,()),&u64,&i64)| (x.0).0.as_bytes().iter().sum::<u8>() as u64);
46+
let data_pact = ExchangeCore::<ColumnBuilder<((String,()),u64,i64)>,_>::new_core(|x: &((&str,()),&u64,&i64)| (x.0).0.as_bytes().iter().map(|x| *x as u64).sum::<u64>() as u64);
47+
let keys_pact = ExchangeCore::<ColumnBuilder<((String,()),u64,i64)>,_>::new_core(|x: &((&str,()),&u64,&i64)| (x.0).0.as_bytes().iter().map(|x| *x as u64).sum::<u64>() as u64);
4848

4949
let data = arrange_core::<_,_,Col2KeyBatcher<_,_,_>, ColKeyBuilder<_,_,_>, ColKeySpine<_,_,_>>(&data, data_pact, "Data");
5050
let keys = arrange_core::<_,_,Col2KeyBatcher<_,_,_>, ColKeyBuilder<_,_,_>, ColKeySpine<_,_,_>>(&keys, keys_pact, "Keys");
@@ -54,16 +54,25 @@ fn main() {
5454

5555
});
5656

57+
// Resources for placing input data in containers.
58+
use std::fmt::Write;
59+
let mut buffer = String::default();
60+
let mut container = Container::default();
61+
5762
// Load up data in batches.
5863
let mut counter = 0;
5964
while counter < 10 * keys {
6065
let mut i = worker.index();
6166
let time = *data_input.time();
6267
while i < size {
6368
let val = (counter + i) % keys;
64-
data_input.send(((&format!("{:?}", val), ()), time, 1));
69+
write!(buffer, "{:?}", val).unwrap();
70+
container.push(((&buffer, ()), time, 1));
71+
buffer.clear();
6572
i += worker.peers();
6673
}
74+
data_input.send_batch(&mut container);
75+
container.clear();
6776
counter += size;
6877
data_input.advance_to(data_input.time() + 1);
6978
keys_input.advance_to(keys_input.time() + 1);
@@ -80,9 +89,13 @@ fn main() {
8089
let time = *data_input.time();
8190
while i < size {
8291
let val = (queries + i) % keys;
83-
data_input.send(((&format!("{:?}", val), ()), time, 1));
92+
write!(buffer, "{:?}", val).unwrap();
93+
container.push(((&buffer, ()), time, 1));
94+
buffer.clear();
8495
i += worker.peers();
8596
}
97+
data_input.send_batch(&mut container);
98+
container.clear();
8699
queries += size;
87100
data_input.advance_to(data_input.time() + 1);
88101
keys_input.advance_to(keys_input.time() + 1);
@@ -293,7 +306,7 @@ mod builder {
293306
let words = self.current.borrow().length_in_words();
294307
let round = (words + ((1 << 18) - 1)) & !((1 << 18) - 1);
295308
if round - words < round / 10 {
296-
let mut alloc = Vec::with_capacity(round);
309+
let mut alloc = Vec::with_capacity(8 * words);
297310
columnar::bytes::serialization::encode(&mut alloc, self.current.borrow().as_bytes());
298311
self.pending.push_back(Column::Align(alloc.into_boxed_slice()));
299312
self.current.clear();
@@ -328,7 +341,12 @@ mod builder {
328341
#[inline]
329342
fn finish(&mut self) -> Option<&mut Self::Container> {
330343
if !self.current.is_empty() {
331-
self.pending.push_back(Column::Typed(std::mem::take(&mut self.current)));
344+
use columnar::Container;
345+
let words = self.current.borrow().length_in_words();
346+
let mut alloc = Vec::with_capacity(8 * words);
347+
columnar::bytes::serialization::encode(&mut alloc, self.current.borrow().as_bytes());
348+
self.pending.push_back(Column::Align(alloc.into_boxed_slice()));
349+
self.current.clear();
332350
}
333351
self.empty = self.pending.pop_front();
334352
self.empty.as_mut()

0 commit comments

Comments
 (0)