Skip to content

Commit 5b40e0c

Browse files
Merge pull request #142 from marshallpierce/mp/string-writer
Add EncoderStringWriter
2 parents 2dc0296 + 8b1ae22 commit 5b40e0c

File tree

7 files changed

+283
-40
lines changed

7 files changed

+283
-40
lines changed

RELEASE-NOTES.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
# Next
2+
3+
- Config methods are const
4+
- Added `EncoderStringWriter` to allow encoding directly to a String
5+
- `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work)
6+
- As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`.
7+
18
# 0.12.2
29

3-
Add `BinHex` alphabet
10+
- Add `BinHex` alphabet
411

512
# 0.12.1
613

benches/benchmarks.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,32 @@ fn do_encode_bench_stream(b: &mut Bencher, &size: &usize) {
123123
});
124124
}
125125

126+
fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) {
127+
let mut v: Vec<u8> = Vec::with_capacity(size);
128+
fill(&mut v);
129+
130+
b.iter(|| {
131+
let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG);
132+
stream_enc.write_all(&v).unwrap();
133+
stream_enc.flush().unwrap();
134+
let _ = stream_enc.into_inner();
135+
});
136+
}
137+
138+
fn do_encode_bench_string_reuse_buf_stream(b: &mut Bencher, &size: &usize) {
139+
let mut v: Vec<u8> = Vec::with_capacity(size);
140+
fill(&mut v);
141+
142+
let mut buf = String::new();
143+
b.iter(|| {
144+
buf.clear();
145+
let mut stream_enc = write::EncoderStringWriter::from(&mut buf, TEST_CONFIG);
146+
stream_enc.write_all(&v).unwrap();
147+
stream_enc.flush().unwrap();
148+
let _ = stream_enc.into_inner();
149+
});
150+
}
151+
126152
fn fill(v: &mut Vec<u8>) {
127153
let cap = v.capacity();
128154
// weak randomness is plenty; we just want to not be completely friendly to the branch predictor
@@ -147,6 +173,8 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {
147173
.with_function("encode_reuse_buf", do_encode_bench_reuse_buf)
148174
.with_function("encode_slice", do_encode_bench_slice)
149175
.with_function("encode_reuse_buf_stream", do_encode_bench_stream)
176+
.with_function("encode_string_stream", do_encode_bench_string_stream)
177+
.with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream)
150178
}
151179

152180
fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> {

examples/make_tables.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,14 @@ fn print_decode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) {
164164
}
165165

166166
fn check_alphabet(alphabet: &[u8]) {
167+
// ensure all characters are distinct
167168
assert_eq!(64, alphabet.len());
168169
let mut set: HashSet<u8> = HashSet::new();
169170
set.extend(alphabet);
170171
assert_eq!(64, set.len());
172+
173+
// must be ASCII to be valid as single UTF-8 bytes
174+
for &b in alphabet {
175+
assert!(b <= 0x7F_u8);
176+
}
171177
}

src/write/encoder.rs

Lines changed: 63 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -25,27 +25,24 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
2525
/// use std::io::Write;
2626
///
2727
/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
28-
/// let mut wrapped_writer = Vec::new();
29-
/// {
30-
/// let mut enc = base64::write::EncoderWriter::new(
31-
/// &mut wrapped_writer, base64::STANDARD);
28+
/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), base64::STANDARD);
3229
///
33-
/// // handle errors as you normally would
34-
/// enc.write_all(b"asdf").unwrap();
35-
/// // could leave this out to be called by Drop, if you don't care
36-
/// // about handling errors
37-
/// enc.finish().unwrap();
30+
/// // handle errors as you normally would
31+
/// enc.write_all(b"asdf").unwrap();
3832
///
39-
/// }
33+
/// // could leave this out to be called by Drop, if you don't care
34+
/// // about handling errors or getting the delegate writer back
35+
/// let delegate = enc.finish().unwrap();
4036
///
4137
/// // base64 was written to the writer
42-
/// assert_eq!(b"YXNkZg==", &wrapped_writer[..]);
38+
/// assert_eq!(b"YXNkZg==", &delegate[..]);
4339
///
4440
/// ```
4541
///
4642
/// # Panics
4743
///
48-
/// Calling `write()` after `finish()` is invalid and will panic.
44+
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
45+
/// error is invalid and will panic.
4946
///
5047
/// # Errors
5148
///
@@ -56,10 +53,12 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
5653
///
5754
/// It has some minor performance loss compared to encoding slices (a couple percent).
5855
/// It does not do any heap allocation.
59-
pub struct EncoderWriter<'a, W: 'a + Write> {
56+
pub struct EncoderWriter<W: Write> {
6057
config: Config,
61-
/// Where encoded data is written to
62-
w: &'a mut W,
58+
/// Where encoded data is written to. It's an Option as it's None immediately before Drop is
59+
/// called so that finish() can return the underlying writer. None implies that finish() has
60+
/// been called successfully.
61+
delegate: Option<W>,
6362
/// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
6463
/// with the next `write()`, encode it, then proceed with the rest of the input normally.
6564
extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
@@ -70,13 +69,11 @@ pub struct EncoderWriter<'a, W: 'a + Write> {
7069
output: [u8; BUF_SIZE],
7170
/// How much of `output` is occupied with encoded data that couldn't be written last time
7271
output_occupied_len: usize,
73-
/// True iff padding / partial last chunk has been written.
74-
finished: bool,
7572
/// panic safety: don't write again in destructor if writer panicked while we were writing to it
7673
panicked: bool,
7774
}
7875

79-
impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
76+
impl<W: Write> fmt::Debug for EncoderWriter<W> {
8077
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
8178
write!(
8279
f,
@@ -89,38 +86,58 @@ impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
8986
}
9087
}
9188

92-
impl<'a, W: Write> EncoderWriter<'a, W> {
89+
impl<W: Write> EncoderWriter<W> {
9390
/// Create a new encoder that will write to the provided delegate writer `w`.
94-
pub fn new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W> {
91+
pub fn new(w: W, config: Config) -> EncoderWriter<W> {
9592
EncoderWriter {
9693
config,
97-
w,
94+
delegate: Some(w),
9895
extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
9996
extra_input_occupied_len: 0,
10097
output: [0u8; BUF_SIZE],
10198
output_occupied_len: 0,
102-
finished: false,
10399
panicked: false,
104100
}
105101
}
106102

107103
/// Encode all remaining buffered data and write it, including any trailing incomplete input
108104
/// triples and associated padding.
109105
///
110-
/// Once this succeeds, no further writes can be performed, as that would produce invalid
111-
/// base64.
106+
/// Once this succeeds, no further writes or calls to this method are allowed.
112107
///
113-
/// This may write to the delegate writer multiple times if the delegate writer does not accept all input provided
114-
/// to its `write` each invocation.
108+
/// This may write to the delegate writer multiple times if the delegate writer does not accept
109+
/// all input provided to its `write` each invocation.
110+
///
111+
/// If you don't care about error handling, it is not necessary to call this function, as the
112+
/// equivalent finalization is done by the Drop impl.
113+
///
114+
/// Returns the writer that this was constructed around.
115115
///
116116
/// # Errors
117117
///
118-
/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
119-
pub fn finish(&mut self) -> Result<()> {
120-
if self.finished {
121-
return Ok(());
118+
/// The first error that is not of `ErrorKind::Interrupted` will be returned.
119+
pub fn finish(&mut self) -> Result<W> {
120+
// If we could consume self in finish(), we wouldn't have to worry about this case, but
121+
// finish() is retryable in the face of I/O errors, so we can't consume here.
122+
if self.delegate.is_none() {
123+
panic!("Encoder has already had finish() called")
122124
};
123125

126+
self.write_final_leftovers()?;
127+
128+
let writer = self.delegate.take().expect("Writer must be present");
129+
130+
Ok(writer)
131+
}
132+
133+
/// Write any remaining buffered data to the delegate writer.
134+
fn write_final_leftovers(&mut self) -> Result<()> {
135+
if self.delegate.is_none() {
136+
// finish() has already successfully called this, and we are now in drop() with a None
137+
// writer, so just no-op
138+
return Ok(());
139+
}
140+
124141
self.write_all_encoded_output()?;
125142

126143
if self.extra_input_occupied_len > 0 {
@@ -138,7 +155,6 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
138155
self.extra_input_occupied_len = 0;
139156
}
140157

141-
self.finished = true;
142158
Ok(())
143159
}
144160

@@ -152,7 +168,11 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
152168
/// that no write took place.
153169
fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
154170
self.panicked = true;
155-
let res = self.w.write(&self.output[..current_output_len]);
171+
let res = self
172+
.delegate
173+
.as_mut()
174+
.expect("Writer must be present")
175+
.write(&self.output[..current_output_len]);
156176
self.panicked = false;
157177

158178
res.map(|consumed| {
@@ -197,7 +217,7 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
197217
}
198218
}
199219

200-
impl<'a, W: Write> Write for EncoderWriter<'a, W> {
220+
impl<W: Write> Write for EncoderWriter<W> {
201221
/// Encode input and then write to the delegate writer.
202222
///
203223
/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
@@ -215,7 +235,7 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> {
215235
///
216236
/// Any errors emitted by the delegate writer are returned.
217237
fn write(&mut self, input: &[u8]) -> Result<usize> {
218-
if self.finished {
238+
if self.delegate.is_none() {
219239
panic!("Cannot write more after calling finish()");
220240
}
221241

@@ -339,17 +359,23 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> {
339359

340360
/// Because this is usually treated as OK to call multiple times, it will *not* flush any
341361
/// incomplete chunks of input or write padding.
362+
/// # Errors
363+
///
364+
/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
342365
fn flush(&mut self) -> Result<()> {
343366
self.write_all_encoded_output()?;
344-
self.w.flush()
367+
self.delegate
368+
.as_mut()
369+
.expect("Writer must be present")
370+
.flush()
345371
}
346372
}
347373

348-
impl<'a, W: Write> Drop for EncoderWriter<'a, W> {
374+
impl<W: Write> Drop for EncoderWriter<W> {
349375
fn drop(&mut self) {
350376
if !self.panicked {
351377
// like `BufWriter`, ignore errors during drop
352-
let _ = self.finish();
378+
let _ = self.write_final_leftovers();
353379
}
354380
}
355381
}

0 commit comments

Comments
 (0)