Skip to content

Commit a7e1f47

Browse files
Merge pull request #224 from marshallpierce/mina86-a
Simplify chunk_encoder, take 2
2 parents a3f692b + 96b29d8 commit a7e1f47

File tree

2 files changed

+20
-80
lines changed

2 files changed

+20
-80
lines changed

RELEASE-NOTES.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
- Implement `source` instead of `cause` on Error types
44
- Roll back MSRV to 1.48.0 so Debian can continue to live in a time warp
5+
- Slightly faster chunked encoding for short inputs
56

67
# 0.21.2
78

@@ -17,7 +18,6 @@
1718
- `Engine.internal_decode` return type changed
1819
- Update MSRV to 1.60.0
1920

20-
2121
# 0.21.0
2222

2323
## Migration

src/chunked_encoder.rs

Lines changed: 19 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1+
use crate::{
2+
encode::add_padding,
3+
engine::{Config, Engine},
4+
};
15
#[cfg(any(feature = "alloc", feature = "std", test))]
26
use alloc::string::String;
3-
use core::cmp;
47
#[cfg(any(feature = "alloc", feature = "std", test))]
58
use core::str;
69

7-
use crate::encode::add_padding;
8-
use crate::engine::{Config, Engine};
9-
1010
/// The output mechanism for ChunkedEncoder's encoded bytes.
1111
pub trait Sink {
1212
type Error;
@@ -15,72 +15,37 @@ pub trait Sink {
1515
fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>;
1616
}
1717

18-
const BUF_SIZE: usize = 1024;
19-
2018
/// A base64 encoder that emits encoded bytes in chunks without heap allocation.
2119
pub struct ChunkedEncoder<'e, E: Engine + ?Sized> {
2220
engine: &'e E,
23-
max_input_chunk_len: usize,
2421
}
2522

2623
impl<'e, E: Engine + ?Sized> ChunkedEncoder<'e, E> {
2724
pub fn new(engine: &'e E) -> ChunkedEncoder<'e, E> {
28-
ChunkedEncoder {
29-
engine,
30-
max_input_chunk_len: max_input_length(BUF_SIZE, engine.config().encode_padding()),
31-
}
25+
ChunkedEncoder { engine }
3226
}
3327

3428
pub fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> {
35-
let mut encode_buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
36-
let mut input_index = 0;
37-
38-
while input_index < bytes.len() {
39-
// either the full input chunk size, or it's the last iteration
40-
let input_chunk_len = cmp::min(self.max_input_chunk_len, bytes.len() - input_index);
41-
42-
let chunk = &bytes[input_index..(input_index + input_chunk_len)];
43-
44-
let mut b64_bytes_written = self.engine.internal_encode(chunk, &mut encode_buf);
45-
46-
input_index += input_chunk_len;
47-
let more_input_left = input_index < bytes.len();
48-
49-
if self.engine.config().encode_padding() && !more_input_left {
50-
// no more input, add padding if needed. Buffer will have room because
51-
// max_input_length leaves room for it.
52-
b64_bytes_written +=
53-
add_padding(b64_bytes_written, &mut encode_buf[b64_bytes_written..]);
29+
const BUF_SIZE: usize = 1024;
30+
const CHUNK_SIZE: usize = BUF_SIZE / 4 * 3;
31+
32+
let mut buf = [0; BUF_SIZE];
33+
for chunk in bytes.chunks(CHUNK_SIZE) {
34+
let mut len = self.engine.internal_encode(chunk, &mut buf);
35+
if chunk.len() != CHUNK_SIZE && self.engine.config().encode_padding() {
36+
// Final, potentially partial, chunk.
37+
// Only need to consider if padding is needed on a partial chunk since full chunk
38+
// is a multiple of 3, which therefore won't be padded.
39+
// Pad output to multiple of four bytes if required by config.
40+
len += add_padding(len, &mut buf[len..]);
5441
}
55-
56-
sink.write_encoded_bytes(&encode_buf[0..b64_bytes_written])?;
42+
sink.write_encoded_bytes(&buf[..len])?;
5743
}
5844

5945
Ok(())
6046
}
6147
}
6248

63-
/// Calculate the longest input that can be encoded for the given output buffer size.
64-
///
65-
/// If the config requires padding, two bytes of buffer space will be set aside so that the last
66-
/// chunk of input can be encoded safely.
67-
///
68-
/// The input length will always be a multiple of 3 so that no encoding state has to be carried over
69-
/// between chunks.
70-
fn max_input_length(encoded_buf_len: usize, padded: bool) -> usize {
71-
let effective_buf_len = if padded {
72-
// make room for padding
73-
encoded_buf_len
74-
.checked_sub(2)
75-
.expect("Don't use a tiny buffer")
76-
} else {
77-
encoded_buf_len
78-
};
79-
80-
// No padding, so just normal base64 expansion.
81-
(effective_buf_len / 4) * 3
82-
}
83-
8449
// A really simple sink that just appends to a string
8550
#[cfg(any(feature = "alloc", feature = "std", test))]
8651
pub(crate) struct StringSink<'a> {
@@ -152,38 +117,13 @@ pub mod tests {
152117
chunked_encode_matches_normal_encode_random(&helper);
153118
}
154119

155-
#[test]
156-
fn max_input_length_no_pad() {
157-
assert_eq!(768, max_input_length(1024, false));
158-
}
159-
160-
#[test]
161-
fn max_input_length_with_pad_decrements_one_triple() {
162-
assert_eq!(765, max_input_length(1024, true));
163-
}
164-
165-
#[test]
166-
fn max_input_length_with_pad_one_byte_short() {
167-
assert_eq!(765, max_input_length(1025, true));
168-
}
169-
170-
#[test]
171-
fn max_input_length_with_pad_fits_exactly() {
172-
assert_eq!(768, max_input_length(1026, true));
173-
}
174-
175-
#[test]
176-
fn max_input_length_cant_use_extra_single_encoded_byte() {
177-
assert_eq!(300, max_input_length(401, false));
178-
}
179-
180120
pub fn chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S) {
181121
let mut input_buf: Vec<u8> = Vec::new();
182122
let mut output_buf = String::new();
183123
let mut rng = rand::rngs::SmallRng::from_entropy();
184124
let input_len_range = Uniform::new(1, 10_000);
185125

186-
for _ in 0..5_000 {
126+
for _ in 0..20_000 {
187127
input_buf.clear();
188128
output_buf.clear();
189129

0 commit comments

Comments
 (0)