Skip to content

Commit 47e0722

Browse files
Update columnar (#611)
1 parent 2398b79 commit 47e0722

File tree

3 files changed

+55
-55
lines changed

3 files changed

+55
-55
lines changed

communication/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ license = "MIT"
1717
default = ["getopts"]
1818

1919
[dependencies]
20-
columnar = "0.1"
20+
columnar = "0.2"
2121
getopts = { version = "0.2.21", optional = true }
2222
byteorder = "1.5"
2323
serde = { version = "1.0", features = ["derive"] }

timely/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ default = ["getopts"]
1919
getopts = ["getopts-dep", "timely_communication/getopts"]
2020

2121
[dependencies]
22-
columnar = "0.1"
22+
columnar = "0.2"
2323
getopts-dep = { package = "getopts", version = "0.2.21", optional = true }
2424
bincode = { version = "1.0" }
2525
byteorder = "1.5"

timely/examples/columnar.rs

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ struct WordCount {
1919

2020
fn main() {
2121

22-
type Container = Column<<WordCount as columnar::Columnar>::Container>;
22+
type Container = Column<WordCount>;
2323

2424
use columnar::Len;
2525

@@ -55,7 +55,7 @@ fn main() {
5555
)
5656
.container::<Container>()
5757
.unary_frontier(
58-
ExchangeCore::<ColumnBuilder<<WordCount as columnar::Columnar>::Container>,_>::new_core(|x: &WordCountReference<&str,&i64>| x.text.len() as u64),
58+
ExchangeCore::<ColumnBuilder<WordCount>,_>::new_core(|x: &WordCountReference<&str,&i64>| x.text.len() as u64),
5959
"WordCount",
6060
|_capability, _info| {
6161
let mut queues = HashMap::new();
@@ -114,12 +114,15 @@ fn main() {
114114
pub use container::Column;
115115
mod container {
116116

117+
use columnar::Columnar;
118+
use columnar::Container as FooBozzle;
119+
117120
use timely_bytes::arc::Bytes;
118121

119122
/// A container based on a columnar store, encoded in aligned bytes.
120-
pub enum Column<C> {
123+
pub enum Column<C: Columnar> {
121124
/// The typed variant of the container.
122-
Typed(C),
125+
Typed(C::Container),
123126
/// The binary variant of the container.
124127
Bytes(Bytes),
125128
/// Relocated, aligned binary data, if `Bytes` doesn't work for some reason.
@@ -129,11 +132,11 @@ mod container {
129132
Align(Box<[u64]>),
130133
}
131134

132-
impl<C: Default> Default for Column<C> {
133-
fn default() -> Self { Self::Typed(C::default()) }
135+
impl<C: Columnar> Default for Column<C> {
136+
fn default() -> Self { Self::Typed(Default::default()) }
134137
}
135138

136-
impl<C: Clone> Clone for Column<C> {
139+
impl<C: Columnar> Clone for Column<C> where C::Container: Clone {
137140
fn clone(&self) -> Self {
138141
match self {
139142
Column::Typed(t) => Column::Typed(t.clone()),
@@ -148,64 +151,55 @@ mod container {
148151
}
149152
}
150153

151-
use columnar::{Clear, Len, Index, bytes::{AsBytes, FromBytes}};
154+
use columnar::{Clear, Len, Index, AsBytes, FromBytes};
152155
use columnar::bytes::serialization::decode;
153156
use columnar::common::IterOwn;
154157

155158
use timely::Container;
156-
impl<C: AsBytes + Clear + Len + Clone + Default + 'static> Container for Column<C>
157-
where
158-
for<'a> C::Borrowed<'a> : Len + Index,
159-
{
159+
impl<C: Columnar> Container for Column<C> {
160160
fn len(&self) -> usize {
161161
match self {
162162
Column::Typed(t) => t.len(),
163-
Column::Bytes(b) => <C::Borrowed<'_> as FromBytes>::from_bytes(&mut decode(bytemuck::cast_slice(b))).len(),
164-
Column::Align(a) => <C::Borrowed<'_> as FromBytes>::from_bytes(&mut decode(a)).len(),
163+
Column::Bytes(b) => <<C::Container as columnar::Container<C>>::Borrowed<'_> as FromBytes>::from_bytes(&mut decode(bytemuck::cast_slice(b))).len(),
164+
Column::Align(a) => <<C::Container as columnar::Container<C>>::Borrowed<'_> as FromBytes>::from_bytes(&mut decode(a)).len(),
165165
}
166166
}
167167
// This sets the `Bytes` variant to be an empty `Typed` variant, appropriate for pushing into.
168168
fn clear(&mut self) {
169169
match self {
170170
Column::Typed(t) => t.clear(),
171-
Column::Bytes(_) => *self = Column::Typed(C::default()),
172-
Column::Align(_) => *self = Column::Typed(C::default()),
171+
Column::Bytes(_) => *self = Column::Typed(Default::default()),
172+
Column::Align(_) => *self = Column::Typed(Default::default()),
173173
}
174174
}
175175

176-
type ItemRef<'a> = <C::Borrowed<'a> as Index>::Ref where Self: 'a;
177-
type Iter<'a> = IterOwn<C::Borrowed<'a>>;
176+
type ItemRef<'a> = C::Ref<'a>;
177+
type Iter<'a> = IterOwn<<C::Container as columnar::Container<C>>::Borrowed<'a>>;
178178
fn iter<'a>(&'a self) -> Self::Iter<'a> {
179179
match self {
180-
Column::Typed(t) => <C::Borrowed<'a> as FromBytes>::from_bytes(&mut t.as_bytes().map(|(_, x)| x)).into_iter(),
181-
Column::Bytes(b) => <C::Borrowed<'a> as FromBytes>::from_bytes(&mut decode(bytemuck::cast_slice(b))).into_iter(),
182-
Column::Align(a) => <C::Borrowed<'a> as FromBytes>::from_bytes(&mut decode(a)).into_iter(),
180+
Column::Typed(t) => t.borrow().into_iter(),
181+
Column::Bytes(b) => <<C::Container as columnar::Container<C>>::Borrowed<'a> as FromBytes>::from_bytes(&mut decode(bytemuck::cast_slice(b))).into_iter(),
182+
Column::Align(a) => <<C::Container as columnar::Container<C>>::Borrowed<'a> as FromBytes>::from_bytes(&mut decode(a)).into_iter(),
183183
}
184184
}
185185

186-
type Item<'a> = <C::Borrowed<'a> as Index>::Ref where Self: 'a;
187-
type DrainIter<'a> = IterOwn<C::Borrowed<'a>>;
186+
type Item<'a> = C::Ref<'a>;
187+
type DrainIter<'a> = IterOwn<<C::Container as columnar::Container<C>>::Borrowed<'a>>;
188188
fn drain<'a>(&'a mut self) -> Self::DrainIter<'a> {
189189
match self {
190-
Column::Typed(t) => <C::Borrowed<'a> as FromBytes>::from_bytes(&mut t.as_bytes().map(|(_, x)| x)).into_iter(),
191-
Column::Bytes(b) => <C::Borrowed<'a> as FromBytes>::from_bytes(&mut decode(bytemuck::cast_slice(b))).into_iter(),
192-
Column::Align(a) => <C::Borrowed<'a> as FromBytes>::from_bytes(&mut decode(a)).into_iter(),
190+
Column::Typed(t) => t.borrow().into_iter(),
191+
Column::Bytes(b) => <<C::Container as columnar::Container<C>>::Borrowed<'a> as FromBytes>::from_bytes(&mut decode(bytemuck::cast_slice(b))).into_iter(),
192+
Column::Align(a) => <<C::Container as columnar::Container<C>>::Borrowed<'a> as FromBytes>::from_bytes(&mut decode(a)).into_iter(),
193193
}
194194
}
195195
}
196196

197197
use timely::container::SizableContainer;
198-
impl<C: AsBytes + Clear + Len + Clone + Default + 'static> SizableContainer for Column<C>
199-
where
200-
for<'a> C::Borrowed<'a> : Len + Index,
201-
{
198+
impl<C: Columnar> SizableContainer for Column<C> {
202199
fn at_capacity(&self) -> bool {
203200
match self {
204201
Self::Typed(t) => {
205-
let length_in_bytes: usize =
206-
t.as_bytes()
207-
.map(|(_, x)| 8 * (1 + (x.len()/8) + if x.len() % 8 == 0 { 0 } else { 1 }))
208-
.sum();
202+
let length_in_bytes = t.borrow().length_in_words() * 8;
209203
length_in_bytes >= (1 << 20)
210204
},
211205
Self::Bytes(_) => true,
@@ -216,9 +210,10 @@ mod container {
216210
}
217211

218212
use timely::container::PushInto;
219-
impl<C: columnar::Push<T>, T> PushInto<T> for Column<C> {
213+
impl<C: Columnar, T> PushInto<T> for Column<C> where C::Container: columnar::Push<T> {
220214
#[inline]
221215
fn push_into(&mut self, item: T) {
216+
use columnar::Push;
222217
match self {
223218
Column::Typed(t) => t.push(item),
224219
Column::Align(_) | Column::Bytes(_) => {
@@ -231,7 +226,7 @@ mod container {
231226
}
232227

233228
use timely::dataflow::channels::ContainerBytes;
234-
impl<C: columnar::bytes::AsBytes> ContainerBytes for Column<C> {
229+
impl<C: Columnar> ContainerBytes for Column<C> {
235230
fn from_bytes(bytes: timely::bytes::arc::Bytes) -> Self {
236231
// Our expectation / hope is that `bytes` is `u64` aligned and sized.
237232
// If the alignment is borked, we can relocate. IF the size is borked,
@@ -251,7 +246,7 @@ mod container {
251246
fn length_in_bytes(&self) -> usize {
252247
match self {
253248
// We'll need one u64 for the length, then the length rounded up to a multiple of 8.
254-
Column::Typed(t) => 8 * t.length_in_words(),
249+
Column::Typed(t) => 8 * t.borrow().length_in_words(),
255250
Column::Bytes(b) => b.len(),
256251
Column::Align(a) => 8 * a.len(),
257252
}
@@ -260,10 +255,11 @@ mod container {
260255
fn into_bytes<W: ::std::io::Write>(&self, writer: &mut W) {
261256
match self {
262257
Column::Typed(t) => {
258+
use columnar::Container;
263259
// Columnar data is serialized as a sequence of `u64` values, with each `[u8]` slice
264260
// serialize as first its length in bytes, and then as many `u64` values as needed.
265261
// Padding should be added, but only for alignment; no specific values are required.
266-
for (align, bytes) in t.as_bytes() {
262+
for (align, bytes) in t.borrow().as_bytes() {
267263
assert!(align <= 8);
268264
let length: u64 = bytes.len().try_into().unwrap();
269265
writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&length))).unwrap();
@@ -284,42 +280,49 @@ use builder::ColumnBuilder;
284280
mod builder {
285281

286282
use std::collections::VecDeque;
287-
use columnar::{Clear, Len, Index, bytes::AsBytes};
283+
use columnar::{Columnar, Clear, Len, AsBytes, Push};
288284
use super::Column;
289285

290286
/// A container builder for `Column<C>`.
291-
#[derive(Default)]
292-
pub struct ColumnBuilder<C> {
287+
pub struct ColumnBuilder<C: Columnar> {
293288
/// Container that we're writing to.
294-
current: C,
289+
current: C::Container,
295290
/// Empty allocation.
296291
empty: Option<Column<C>>,
297292
/// Completed containers pending to be sent.
298293
pending: VecDeque<Column<C>>,
299294
}
300295

301296
use timely::container::PushInto;
302-
impl<C: columnar::Push<T> + Clear + AsBytes, T> PushInto<T> for ColumnBuilder<C> {
297+
impl<C: Columnar, T> PushInto<T> for ColumnBuilder<C> where C::Container: columnar::Push<T> {
303298
#[inline]
304299
fn push_into(&mut self, item: T) {
305300
self.current.push(item);
306301
// If there is less than 10% slop with 2MB backing allocations, mint a container.
307-
let words = self.current.length_in_words();
302+
use columnar::Container;
303+
let words = self.current.borrow().length_in_words();
308304
let round = (words + ((1 << 18) - 1)) & !((1 << 18) - 1);
309305
if round - words < round / 10 {
310306
let mut alloc = Vec::with_capacity(round);
311-
columnar::bytes::serialization::encode(&mut alloc, self.current.as_bytes());
307+
columnar::bytes::serialization::encode(&mut alloc, self.current.borrow().as_bytes());
312308
self.pending.push_back(Column::Align(alloc.into_boxed_slice()));
313309
self.current.clear();
314310
}
315311
}
316312
}
317313

314+
impl<C: Columnar> Default for ColumnBuilder<C> {
315+
fn default() -> Self {
316+
ColumnBuilder {
317+
current: Default::default(),
318+
empty: None,
319+
pending: Default::default(),
320+
}
321+
}
322+
}
323+
318324
use timely::container::{ContainerBuilder, LengthPreservingContainerBuilder};
319-
impl<C: AsBytes + Clear + Len + Clone + Default + 'static> ContainerBuilder for ColumnBuilder<C>
320-
where
321-
for<'a> C::Borrowed<'a> : Len + Index,
322-
{
325+
impl<C: Columnar> ContainerBuilder for ColumnBuilder<C> where C::Container: Clone {
323326
type Container = Column<C>;
324327

325328
#[inline]
@@ -342,8 +345,5 @@ mod builder {
342345
}
343346
}
344347

345-
impl<C: AsBytes + Clear + Len + Clone + Default + 'static> LengthPreservingContainerBuilder for ColumnBuilder<C>
346-
where
347-
for<'a> C::Borrowed<'a> : Len + Index,
348-
{ }
348+
impl<C: Columnar> LengthPreservingContainerBuilder for ColumnBuilder<C> where C::Container: Clone { }
349349
}

0 commit comments

Comments
 (0)