Skip to content

Commit f54796a

Browse files
Remove abomonation to reduce unsoundness (#575)
* Remove abomonation to reduce unsoundness * Add missing Serialize/Deserialize derives * Tidy zero_copy, and leave a note explaining that it isn't
1 parent ca17c86 commit f54796a

File tree

20 files changed

+67
-195
lines changed

20 files changed

+67
-195
lines changed

.github/workflows/test.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,3 @@ jobs:
1616
# clutter target/debug/deps with multiple copies of things.
1717
run: for file in $(find mdbook -name '*.md'); do rustdoc --test $file -L ./target/debug/deps; done
1818
- run: cargo test
19-
- run: cargo test --features bincode

communication/Cargo.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,10 @@ default = ["getopts"]
1818

1919
[dependencies]
2020
getopts = { version = "0.2.14", optional = true }
21-
bincode = { version = "1.0", optional = true }
21+
bincode = { version = "1.0" }
2222
byteorder = "1.5"
2323
serde_derive = "1.0"
2424
serde = "1.0"
25-
abomonation = "0.7"
26-
abomonation_derive = "0.5"
2725
timely_bytes = { path = "../bytes", version = "0.12" }
2826
timely_logging = { path = "../logging", version = "0.12" }
2927
crossbeam-channel = "0.5.0"

communication/src/allocator/zero_copy/mod.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
//! Allocators based on serialized data which avoid copies.
22
//!
3-
//! These allocators are based on `Abomonation` serialization, and its ability to deserialized
4-
//! typed Rust data in-place. They surface references to data, often ultimately referencing the
3+
//! These allocators were based on `Abomonation` serialization, and its ability to deserialized
4+
//! typed Rust data in-place. They surfaced references to data, often ultimately referencing the
55
//! raw binary data they initial received.
6+
//!
7+
//! For the moment, they no longer use Abomonation due to its unsafety, and instead rely on the
8+
//! use of `Message::from_bytes` which .. could .. use Abomonation or something safer, but uses
9+
//! `bincode` at of this writing.
610
711
pub mod bytes_slab;
812
pub mod bytes_exchange;

communication/src/allocator/zero_copy/push_pull.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ impl<T:Data> Pull<Message<T>> for Puller<T> {
9090
self.receiver
9191
.borrow_mut()
9292
.pop_front()
93-
.map(|bytes| unsafe { Message::from_bytes(bytes) });
93+
.map(Message::from_bytes);
9494

9595
&mut self.current
9696
}
@@ -134,7 +134,7 @@ impl<T:Data> Pull<Message<T>> for PullerInner<T> {
134134
self.receiver
135135
.borrow_mut()
136136
.pop_front()
137-
.map(|bytes| unsafe { Message::from_bytes(bytes) });
137+
.map(Message::from_bytes);
138138

139139
&mut self.current
140140
}

communication/src/lib.rs

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
//! receive endpoint. Messages sent into a send endpoint will eventually be received by the corresponding worker,
99
//! if it receives often enough. The point-to-point channels are each FIFO, but with no fairness guarantees.
1010
//!
11-
//! To be communicated, a type must implement the [`Serialize`](serde::Serialize) trait when using the
12-
//! `bincode` feature or the [`Abomonation`](abomonation::Abomonation) trait when not.
11+
//! To be communicated, a type must implement the [`Serialize`](serde::Serialize) trait.
1312
//!
1413
//! Channel endpoints also implement a lower-level `push` and `pull` interface (through the [`Push`](Push) and [`Pull`](Pull)
1514
//! traits), which is used for more precise control of resources.
@@ -77,14 +76,9 @@
7776

7877
#[cfg(feature = "getopts")]
7978
extern crate getopts;
80-
#[cfg(feature = "bincode")]
8179
extern crate bincode;
82-
#[cfg(feature = "bincode")]
8380
extern crate serde;
8481

85-
extern crate abomonation;
86-
#[macro_use] extern crate abomonation_derive;
87-
8882
extern crate timely_bytes as bytes;
8983
extern crate timely_logging as logging_core;
9084

@@ -97,26 +91,15 @@ pub mod buzzer;
9791

9892
use std::any::Any;
9993

100-
#[cfg(feature = "bincode")]
10194
use serde::{Serialize, Deserialize};
102-
#[cfg(not(feature = "bincode"))]
103-
use abomonation::Abomonation;
10495

10596
pub use allocator::Generic as Allocator;
10697
pub use allocator::Allocate;
10798
pub use initialize::{initialize, initialize_from, Config, WorkerGuards};
10899
pub use message::Message;
109100

110101
/// A composite trait for types that may be used with channels.
111-
#[cfg(not(feature = "bincode"))]
112-
pub trait Data : Send+Sync+Any+Abomonation+'static { }
113-
#[cfg(not(feature = "bincode"))]
114-
impl<T: Send+Sync+Any+Abomonation+'static> Data for T { }
115-
116-
/// A composite trait for types that may be used with channels.
117-
#[cfg(feature = "bincode")]
118102
pub trait Data : Send+Sync+Any+Serialize+for<'a>Deserialize<'a>+'static { }
119-
#[cfg(feature = "bincode")]
120103
impl<T: Send+Sync+Any+Serialize+for<'a>Deserialize<'a>+'static> Data for T { }
121104

122105
/// Pushing elements of type `T`.

communication/src/logging.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
//! Configuration and events for communication logging.
22
3+
use serde::{Serialize, Deserialize};
4+
35
/// Configuration information about a communication thread.
4-
#[derive(Abomonation, Debug, PartialEq, Eq, Hash, Clone, Copy)]
6+
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
57
pub struct CommunicationSetup {
68
/// True when this is a send thread (or the receive thread).
79
pub sender: bool,
@@ -12,7 +14,7 @@ pub struct CommunicationSetup {
1214
}
1315

1416
/// Various communication events.
15-
#[derive(Abomonation, Debug, PartialEq, Eq, Hash, Clone, Copy)]
17+
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
1618
pub enum CommunicationEvent {
1719
/// An observed message.
1820
Message(MessageEvent),
@@ -21,7 +23,7 @@ pub enum CommunicationEvent {
2123
}
2224

2325
/// An observed message.
24-
#[derive(Abomonation, Debug, PartialEq, Eq, Hash, Clone, Copy)]
26+
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
2527
pub struct MessageEvent {
2628
/// true for send event, false for receive event
2729
pub is_send: bool,
@@ -30,7 +32,7 @@ pub struct MessageEvent {
3032
}
3133

3234
/// Starting or stopping communication threads.
33-
#[derive(Abomonation, Debug, PartialEq, Eq, Hash, Clone, Copy)]
35+
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
3436
pub struct StateEvent {
3537
/// Is the thread a send (vs a recv) thread.
3638
pub send: bool,

communication/src/message.rs

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
33
use std::sync::Arc;
44
use bytes::arc::Bytes;
5-
use abomonation;
65
use crate::Data;
76

87
/// Either an immutable or mutable reference.
@@ -68,8 +67,6 @@ pub struct Message<T> {
6867

6968
/// Possible returned representations from a channel.
7069
enum MessageContents<T> {
71-
/// Binary representation. Only available as a reference.
72-
Binary(abomonation::abomonated::Abomonated<T, Bytes>),
7370
/// Rust typed instance. Available for ownership.
7471
Owned(T),
7572
/// Atomic reference counted. Only available as a reference.
@@ -88,15 +85,13 @@ impl<T> Message<T> {
8885
/// Destructures and returns any typed data.
8986
pub fn if_typed(self) -> Option<T> {
9087
match self.payload {
91-
MessageContents::Binary(_) => None,
9288
MessageContents::Owned(typed) => Some(typed),
9389
MessageContents::Arc(_) => None,
9490
}
9591
}
9692
/// Returns a mutable reference, if typed.
9793
pub fn if_mut(&mut self) -> Option<&mut T> {
9894
match &mut self.payload {
99-
MessageContents::Binary(_) => None,
10095
MessageContents::Owned(typed) => Some(typed),
10196
MessageContents::Arc(_) => None,
10297
}
@@ -108,54 +103,12 @@ impl<T> Message<T> {
108103
/// data are serialized binary data.
109104
pub fn as_ref_or_mut(&mut self) -> RefOrMut<T> {
110105
match &mut self.payload {
111-
MessageContents::Binary(bytes) => { RefOrMut::Ref(bytes) },
112106
MessageContents::Owned(typed) => { RefOrMut::Mut(typed) },
113107
MessageContents::Arc(typed) => { RefOrMut::Ref(typed) },
114108
}
115109
}
116110
}
117111

118-
// These methods require `T` to implement `Abomonation`, for serialization functionality.
119-
#[cfg(not(feature = "bincode"))]
120-
impl<T: Data> Message<T> {
121-
/// Wrap bytes as a message.
122-
///
123-
/// # Safety
124-
///
125-
/// This method is unsafe, in that `Abomonated::new()` is unsafe: it presumes that
126-
/// the binary data can be safely decoded, which is unsafe for e.g. UTF8 data and
127-
/// enumerations (perhaps among many other types).
128-
pub unsafe fn from_bytes(bytes: Bytes) -> Self {
129-
let abomonated = abomonation::abomonated::Abomonated::new(bytes).expect("Abomonated::new() failed.");
130-
Message { payload: MessageContents::Binary(abomonated) }
131-
}
132-
133-
/// The number of bytes required to serialize the data.
134-
pub fn length_in_bytes(&self) -> usize {
135-
match &self.payload {
136-
MessageContents::Binary(bytes) => { bytes.as_bytes().len() },
137-
MessageContents::Owned(typed) => { abomonation::measure(typed) },
138-
MessageContents::Arc(typed) =>{ abomonation::measure::<T>(&**typed) } ,
139-
}
140-
}
141-
142-
/// Writes the binary representation into `writer`.
143-
pub fn into_bytes<W: ::std::io::Write>(&self, writer: &mut W) {
144-
match &self.payload {
145-
MessageContents::Binary(bytes) => {
146-
writer.write_all(bytes.as_bytes()).expect("Message::into_bytes(): write_all failed.");
147-
},
148-
MessageContents::Owned(typed) => {
149-
unsafe { abomonation::encode(typed, writer).expect("Message::into_bytes(): Abomonation::encode failed"); }
150-
},
151-
MessageContents::Arc(typed) => {
152-
unsafe { abomonation::encode(&**typed, writer).expect("Message::into_bytes(): Abomonation::encode failed"); }
153-
},
154-
}
155-
}
156-
}
157-
158-
#[cfg(feature = "bincode")]
159112
impl<T: Data> Message<T> {
160113
/// Wrap bytes as a message.
161114
pub fn from_bytes(bytes: Bytes) -> Self {
@@ -166,7 +119,6 @@ impl<T: Data> Message<T> {
166119
/// The number of bytes required to serialize the data.
167120
pub fn length_in_bytes(&self) -> usize {
168121
match &self.payload {
169-
MessageContents::Binary(bytes) => { bytes.as_bytes().len() },
170122
MessageContents::Owned(typed) => {
171123
::bincode::serialized_size(&typed).expect("bincode::serialized_size() failed") as usize
172124
},
@@ -179,9 +131,6 @@ impl<T: Data> Message<T> {
179131
/// Writes the binary representation into `writer`.
180132
pub fn into_bytes<W: ::std::io::Write>(&self, writer: &mut W) {
181133
match &self.payload {
182-
MessageContents::Binary(bytes) => {
183-
writer.write_all(bytes.as_bytes()).expect("Message::into_bytes(): write_all failed.");
184-
},
185134
MessageContents::Owned(typed) => {
186135
::bincode::serialize_into(writer, &typed).expect("bincode::serialize_into() failed");
187136
},
@@ -197,7 +146,6 @@ impl<T> ::std::ops::Deref for Message<T> {
197146
fn deref(&self) -> &Self::Target {
198147
// TODO: In principle we have aready decoded, but let's go again
199148
match &self.payload {
200-
MessageContents::Binary(bytes) => { bytes },
201149
MessageContents::Owned(typed) => { typed },
202150
MessageContents::Arc(typed) => { typed },
203151
}
@@ -208,7 +156,6 @@ impl<T: Clone> Message<T> {
208156
/// Produces a typed instance of the wrapped element.
209157
pub fn into_typed(self) -> T {
210158
match self.payload {
211-
MessageContents::Binary(bytes) => bytes.clone(),
212159
MessageContents::Owned(instance) => instance,
213160
// TODO: Could attempt `Arc::try_unwrap()` here.
214161
MessageContents::Arc(instance) => (*instance).clone(),
@@ -218,7 +165,6 @@ impl<T: Clone> Message<T> {
218165
pub fn as_mut(&mut self) -> &mut T {
219166

220167
let cloned: Option<T> = match &self.payload {
221-
MessageContents::Binary(bytes) => Some((*bytes).clone()),
222168
MessageContents::Owned(_) => None,
223169
// TODO: Could attempt `Arc::try_unwrap()` here.
224170
MessageContents::Arc(typed) => Some((**typed).clone()),

communication/src/networking.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::thread::sleep;
99
use std::time::Duration;
1010

1111
use byteorder::{ReadBytesExt, WriteBytesExt};
12+
use serde::{Deserialize, Serialize};
1213

1314
// This constant is sent along immediately after establishing a TCP stream, so
1415
// that it is easy to sniff out Timely traffic when it is multiplexed with
@@ -21,7 +22,7 @@ type ByteOrder = byteorder::BigEndian;
2122
/// Framing data for each `Vec<u8>` transmission, indicating a typed channel, the source and
2223
/// destination workers, and the length in bytes.
2324
// *Warning*: Adding, removing and altering fields requires to adjust the implementation below!
24-
#[derive(Abomonation, Debug, PartialEq, Eq, Hash, Clone, Copy)]
25+
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
2526
pub struct MessageHeader {
2627
/// index of channel.
2728
pub channel: usize,

mdbook/src/chapter_4/chapter_4_5.md

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,33 +17,21 @@ struct YourStruct { .. }
1717

1818
## The `ExchangeData` trait
1919

20-
The `ExchangeData` trait is more complicated, and is established in the `communication/` module. There are two options for this trait, which are determined by whether you use the `--bincode` feature at compilation, or not.
20+
The `ExchangeData` trait is more complicated, and is established in the `communication/` module. The trait is a synonym for
2121

22-
* If you use `--bincode` then the trait is a synonym for
23-
24-
```rust,ignore
25-
Send+Sync+Any+serde::Serialize+for<'a>serde::Deserialize<'a>+'static
26-
```
27-
28-
where `serde` is Rust's most popular serialization and deserialization crate. A great many types implement these traits. If your types does not, you should add these decorators to their definition:
29-
30-
```rust,ignore
31-
#[derive(Serialize, Deserialize)]
32-
```
33-
34-
You must include the `serde` crate, and if not on Rust 2018 the `serde_derive` crate.
35-
36-
The downside to the `--bincode` flag is that deserialization will always involve a clone of the data, which has the potential to adversely impact performance. For example, if you have structures that contain lots of strings, timely dataflow will create allocations for each string even if you do not plan to use all of them.
22+
```rust,ignore
23+
Send+Sync+Any+serde::Serialize+for<'a>serde::Deserialize<'a>+'static
24+
```
3725

38-
* If you do not use the `--bincode` feature, then the `Serialize` and `Deserialize` requirements are replaced by `Abomonation`, from the `abomonation` crate. This trait allows in-place deserialization, but is implemented for fewer types, and has the potential to be a bit scarier (due to in-place pointer correction).
26+
where `serde` is Rust's most popular serialization and deserialization crate. A great many types implement these traits. If your types does not, you should add these decorators to their definition:
3927

40-
Your types likely do not implement `Abomonation` by default, but you can similarly use
28+
```rust,ignore
29+
#[derive(Serialize, Deserialize)]
30+
```
4131

42-
```rust,ignore
43-
#[derive(Abomonation)]
44-
```
32+
You must include the `serde` crate, and if not on Rust 2018 the `serde_derive` crate.
4533

46-
You must include the `abomonation` and `abomonation_derive` crate for this to work correctly.
34+
The downside to is that deserialization will always involve a clone of the data, which has the potential to adversely impact performance. For example, if you have structures that contain lots of strings, timely dataflow will create allocations for each string even if you do not plan to use all of them.
4735

4836
## An example
4937

@@ -140,7 +128,7 @@ impl<D> TreeNode<D> {
140128

141129
We get a new error. A not especially helpful error. It says that it cannot find an `exchange` method, or more specifically that one exists but it doesn't apply to our type at hand. This is because the data need to satisfy the `ExchangeData` trait but do not. It would be better if this were clearer in the error messages, I agree.
142130

143-
We can fix the problem two ways. First, if you would like to use `bincode`, then we update the source like so:
131+
The fix is to update the source like so:
144132

145133
```rust,ignore
146134
#[macro_use]
@@ -154,10 +142,10 @@ struct TreeNode<D> {
154142
}
155143
```
156144

157-
and make sure to include the `serde_derive` and `serde` crates. Now when we run things (notice the `--features` flag) we see:
145+
and make sure to include the `serde_derive` and `serde` crates.
158146

159147
```ignore
160-
Echidnatron% cargo run --example types --features bincode
148+
Echidnatron% cargo run --example types
161149
Finished dev [unoptimized + debuginfo] target(s) in 0.07s
162150
Running `target/debug/examples/types`
163151
seen: TreeNode { data: 0, children: [] }

timely/Cargo.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,13 @@ license = "MIT"
1717

1818
[features]
1919
default = ["getopts"]
20-
bincode= ["timely_communication/bincode"]
2120
getopts = ["getopts-dep", "timely_communication/getopts"]
2221

2322
[dependencies]
2423
getopts-dep = { package = "getopts", version = "0.2.14", optional = true }
24+
bincode = { version = "1.0" }
2525
serde = "1.0"
2626
serde_derive = "1.0"
27-
abomonation = "0.7.3"
28-
abomonation_derive = "0.5"
2927
timely_bytes = { path = "../bytes", version = "0.12" }
3028
timely_logging = { path = "../logging", version = "0.12" }
3129
timely_communication = { path = "../communication", version = "0.12", default-features = false }

0 commit comments

Comments
 (0)