Skip to content

Commit 157b80d

Browse files
authored
Panic with consistent message on TCP failure (#486)
To allow downstream crates to sniff out these panics and downgrade them, if appropriate.
1 parent 9548bf9 commit 157b80d

File tree

1 file changed

+30
-15
lines changed
  • communication/src/allocator/zero_copy

1 file changed

+30
-15
lines changed

communication/src/allocator/zero_copy/tcp.rs

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//!
22
3-
use std::io::{Read, Write};
3+
use std::io::{self, Read, Write};
44
use std::net::TcpStream;
55
use crossbeam_channel::{Sender, Receiver};
66

@@ -13,12 +13,22 @@ use logging_core::Logger;
1313

1414
use crate::logging::{CommunicationEvent, CommunicationSetup, MessageEvent, StateEvent};
1515

16+
fn tcp_panic(context: &'static str, cause: io::Error) -> ! {
17+
// NOTE: some downstream crates sniff out "timely communication error:" from
18+
// the panic message. Avoid removing or rewording this message if possible.
19+
// It'd be nice to instead use `panic_any` here with a structured error
20+
// type, but the panic message for `panic_any` is no good (Box<dyn Any>).
21+
panic!("timely communication error: {}: {}", context, cause)
22+
}
23+
1624
/// Repeatedly reads from a TcpStream and carves out messages.
1725
///
1826
/// The intended communication pattern is a sequence of (header, message)^* for valid
1927
/// messages, followed by a header for a zero length message indicating the end of stream.
20-
/// If the stream ends without being shut down, the receive thread panics in an attempt to
21-
/// take down the computation and cause the failures to cascade.
28+
///
29+
/// If the stream ends without being shut down, or if reading from the stream fails, the
30+
/// receive thread panics with a message that starts with "timely communication error:"
31+
/// in an attempt to take down the computation and cause the failures to cascade.
2232
pub fn recv_loop(
2333
mut reader: TcpStream,
2434
targets: Vec<Receiver<MergeQueue>>,
@@ -56,15 +66,16 @@ pub fn recv_loop(
5666

5767
// Attempt to read some more bytes into self.buffer.
5868
let read = match reader.read(&mut buffer.empty()) {
69+
Err(x) => tcp_panic("reading data", x),
70+
Ok(n) if n == 0 => {
71+
tcp_panic(
72+
"reading data",
73+
std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "socket closed"),
74+
);
75+
}
5976
Ok(n) => n,
60-
Err(x) => {
61-
// We don't expect this, as socket closure results in Ok(0) reads.
62-
println!("Error: {:?}", x);
63-
0
64-
},
6577
};
6678

67-
assert!(read > 0);
6879
buffer.make_valid(read);
6980

7081
// Consume complete messages from the front of self.buffer.
@@ -89,7 +100,7 @@ pub fn recv_loop(
89100
panic!("Clean shutdown followed by data.");
90101
}
91102
buffer.ensure_capacity(1);
92-
if reader.read(&mut buffer.empty()).expect("read failure") > 0 {
103+
if reader.read(&mut buffer.empty()).unwrap_or_else(|e| tcp_panic("reading EOF", e)) > 0 {
93104
panic!("Clean shutdown followed by data.");
94105
}
95106
}
@@ -111,6 +122,10 @@ pub fn recv_loop(
111122
///
112123
/// The intended communication pattern is a sequence of (header, message)^* for valid
113124
/// messages, followed by a header for a zero length message indicating the end of stream.
125+
///
126+
/// If writing to the stream fails, the send thread panics with a message that starts with
127+
/// "timely communication error:" in an attempt to take down the computation and cause the
128+
/// failures to cascade.
114129
pub fn send_loop(
115130
// TODO: Maybe we don't need BufWriter with consolidation in writes.
116131
writer: TcpStream,
@@ -148,7 +163,7 @@ pub fn send_loop(
148163
// still be a signal incoming.
149164
//
150165
// We could get awoken by more data, a channel closing, or spuriously perhaps.
151-
writer.flush().expect("Failed to flush writer.");
166+
writer.flush().unwrap_or_else(|e| tcp_panic("flushing writer", e));
152167
sources.retain(|source| !source.is_complete());
153168
if !sources.is_empty() {
154169
std::thread::park();
@@ -167,7 +182,7 @@ pub fn send_loop(
167182
}
168183
});
169184

170-
writer.write_all(&bytes[..]).expect("Write failure in send_loop.");
185+
writer.write_all(&bytes[..]).unwrap_or_else(|e| tcp_panic("writing data", e));
171186
}
172187
}
173188
}
@@ -182,9 +197,9 @@ pub fn send_loop(
182197
length: 0,
183198
seqno: 0,
184199
};
185-
header.write_to(&mut writer).expect("Failed to write header!");
186-
writer.flush().expect("Failed to flush writer.");
187-
writer.get_mut().shutdown(::std::net::Shutdown::Write).expect("Write shutdown failed");
200+
header.write_to(&mut writer).unwrap_or_else(|e| tcp_panic("writing data", e));
201+
writer.flush().unwrap_or_else(|e| tcp_panic("flushing writer", e));
202+
writer.get_mut().shutdown(::std::net::Shutdown::Write).unwrap_or_else(|e| tcp_panic("shutting down writer", e));
188203
logger.as_mut().map(|logger| logger.log(MessageEvent { is_send: true, header }));
189204

190205
// Log the send thread's end.

0 commit comments

Comments
 (0)