Skip to content

Commit 3155ca7

Browse files
Various cleanups
- Improve engine tests - Improve comments - Remove dead code - Improve error message byte formatting
1 parent 3c62bd4 commit 3155ca7

File tree

9 files changed

+91
-55
lines changed

9 files changed

+91
-55
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ main.rs
1010
*.iml
1111

1212
# `perf record` files
13-
perf.data*
13+
/*perf.data*
1414
/tmp

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,7 @@ std = []
3131
[profile.bench]
3232
# Useful for better disassembly when using `perf record` and `perf annotate`
3333
debug = true
34+
35+
[profile.test]
36+
# Faster tests save much more than the increase in compilation time
37+
opt-level = 3

RELEASE-NOTES.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
- This opens the door to a portable constant-time implementation ([#153](https://github.com/marshallpierce/rust-base64/pull/153), presumably `ConstantTimePortable`?) for security-sensitive applications that need side-channel resistance, and CPU-specific SIMD implementations for more speed.
66
- Standard base64 per the RFC is available via `DEFAULT_ENGINE`. To use different alphabets or other settings (padding, etc), create your own engine instance.
77
- `CharacterSet` is now `Alphabet` (per the RFC), and allows creating custom alphabets. The corresponding tables that were previously code-generated are now built dynamically.
8-
- Since there are already multiple breaking changes, various functions are renamed to be more consistent and discoverable
9-
- MSRV is now 1.47.0
10-
- DecoderReader now owns its inner reader, and can expose it via `into_inner()`. For symmetry, `EncoderWriter` can do the same with its writer.
8+
- Since there are already multiple breaking changes, various functions are renamed to be more consistent and discoverable.
9+
- MSRV is now 1.47.0 to allow various things to use `const fn`.
10+
- `DecoderReader` now owns its inner reader, and can expose it via `into_inner()`. For symmetry, `EncoderWriter` can do the same with its writer.
1111

1212
# 0.13.0
1313

src/alphabet.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
22
3+
use crate::PAD_BYTE;
34
#[cfg(any(feature = "std", test))]
45
use std::{convert, error, fmt};
56

@@ -17,7 +18,7 @@ const ALPHABET_SIZE: usize = 64;
1718
/// &custom,
1819
/// base64::engine::fast_portable::PAD);
1920
/// ```
20-
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
21+
#[derive(Clone, Debug, Eq, PartialEq)]
2122
pub struct Alphabet {
2223
pub(crate) symbols: [u8; ALPHABET_SIZE],
2324
}
@@ -39,7 +40,7 @@ impl Alphabet {
3940
Alphabet { symbols }
4041
}
4142

42-
/// Create a `CharacterSet` from a string of 64 unique printable ASCII bytes.
43+
/// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
4344
///
4445
/// The `=` byte is not allowed as it is used for padding.
4546
///
@@ -62,7 +63,7 @@ impl Alphabet {
6263
return Err(ParseAlphabetError::UnprintableByte(byte));
6364
}
6465
// = is assumed to be padding, so cannot be used as a symbol
65-
if b'=' == byte {
66+
if byte == PAD_BYTE {
6667
return Err(ParseAlphabetError::ReservedByte(byte));
6768
}
6869

@@ -121,9 +122,9 @@ impl fmt::Display for ParseAlphabetError {
121122
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122123
match self {
123124
ParseAlphabetError::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
124-
ParseAlphabetError::DuplicatedByte(b) => write!(f, "Duplicated byte: {}", b),
125-
ParseAlphabetError::UnprintableByte(b) => write!(f, "Unprintable byte: {}", b),
126-
ParseAlphabetError::ReservedByte(b) => write!(f, "Reserved byte: {}", b),
125+
ParseAlphabetError::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
126+
ParseAlphabetError::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
127+
ParseAlphabetError::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
127128
}
128129
}
129130
}

src/decode.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ use core::fmt;
99
#[cfg(any(feature = "std", test))]
1010
use std::error;
1111

12-
// TODO how to handle InvalidLastSymbol and InvalidLength behavior across engines?
13-
1412
/// Errors that can occur while decoding.
1513
#[derive(Clone, Debug, PartialEq, Eq)]
1614
pub enum DecodeError {
@@ -58,7 +56,7 @@ impl error::Error for DecodeError {
5856
}
5957
}
6058

61-
///Decode base64 using the [default engine](DEFAULT_ENGINE), alphabet, and config.
59+
///Decode base64 using the [default engine](DEFAULT_ENGINE).
6260
///Returns a `Result` containing a `Vec<u8>`.
6361
///
6462
///# Example
@@ -93,10 +91,10 @@ pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> {
9391
///
9492
/// // custom engine setup
9593
/// let bytes_url = base64::decode_engine(
96-
/// "aGVsbG8gaW50ZXJuZXR-Cg==",
94+
/// "aGVsbG8gaW50ZXJuZXR-Cg",
9795
/// &base64::engine::fast_portable::FastPortable::from(
9896
/// &base64::alphabet::URL_SAFE,
99-
/// base64::engine::fast_portable::PAD),
97+
/// base64::engine::fast_portable::NO_PAD),
10098
///
10199
/// ).unwrap();
102100
/// println!("{:?}", bytes_url);

src/encode.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::PAD_BYTE;
77
#[cfg(any(feature = "alloc", feature = "std", test))]
88
use alloc::{string::String, vec};
99

10-
///Encode arbitrary octets as base64 using the [default engine](DEFAULT_ENGINE), alphabet, and config.
10+
///Encode arbitrary octets as base64 using the [default engine](DEFAULT_ENGINE).
1111
///Returns a `String`.
1212
///
1313
///# Example

src/engine/fast_portable/mod.rs

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -216,23 +216,6 @@ pub(crate) const fn decode_table(alphabet: &Alphabet) -> [u8; 256] {
216216
return decode_table;
217217
}
218218

219-
// fn decode_aligned(symbol: u8, decode_table: &[u8; 256]) -> u8 {
220-
// let mut result: u8 = 0x00;
221-
// // If `symbol` is inside the printable range, one of these two derived indices will be equal to
222-
// // the original index, and the decoded byte will end up in `result`. If `symbol` is not
223-
// // printable, neither will equal the original symbol, and so both decoded bytes will have 0x00
224-
// // as a mask.
225-
// // TODO invalid bytes decoded to 0x00 instead of 0xFF?
226-
// let idx: [u8; 2] = [symbol % 64, symbol % 64 + 64];
227-
// for i in 0..2 {
228-
// let symbol_eq_mod = idx[i] == symbol;
229-
// // if symbol equals its mod flavor, 0xFF, else 0x00
230-
// let mask = ((symbol_eq_mod) as i8 - 1) as u8;
231-
// result = result | (decode_table[idx[i] as usize] & mask);
232-
// }
233-
// result
234-
// }
235-
236219
#[inline]
237220
fn read_u64(s: &[u8]) -> u64 {
238221
u64::from_be_bytes(s[..8].try_into().unwrap())
@@ -315,6 +298,9 @@ impl Config for FastPortableConfig {
315298
}
316299

317300
/// Include padding bytes when encoding.
301+
///
302+
/// This is the standard per the base64 RFC, but consider using [NO_PAD] instead as padding serves
303+
/// little purpose in practice.
318304
pub const PAD: FastPortableConfig = FastPortableConfig::new();
319305

320306
/// Don't add padding when encoding.

src/engine/mod.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ mod naive;
1010
#[cfg(test)]
1111
mod tests;
1212

13-
/// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use.
13+
/// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this.
1414
///
1515
/// Different implementations offer different characteristics. The library currently ships with
16-
/// a general-purpose [FastPortable] that offers good speed and works on any CPU, with more choices
16+
/// a general-purpose [FastPortable] impl that offers good speed and works on any CPU, with more choices
1717
/// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed.
1818
///
1919
/// See [DEFAULT_ENGINE] if you just want standard base64. Otherwise, when possible, it's
@@ -40,10 +40,9 @@ pub trait Engine: Send + Sync {
4040
/// Must not write any bytes into the output slice other than the encoded data.
4141
fn encode(&self, input: &[u8], output: &mut [u8]) -> usize;
4242

43-
/// As an optimization, it is sometimes helpful to have a conservative estimate of the decoded
44-
/// size before doing the decoding.
45-
///
46-
/// The result of this must be passed to [Engine::decode()].
43+
/// As an optimization to prevent the decoded length from being calculated twice, it is
44+
/// sometimes helpful to have a conservative estimate of the decoded size before doing the
45+
/// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
4746
fn decoded_length_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
4847

4948
/// Decode `input` base64 bytes into the `output` buffer.
@@ -87,14 +86,14 @@ pub trait Config {
8786
/// The decode estimate used by an engine implementation. Users do not need to interact with this;
8887
/// it is only for engine implementors.
8988
///
90-
/// Implementors may want to store relevant calculations when constructing this to avoid having
91-
/// to calculate them again during actual decoding.
89+
/// Implementors may store relevant data here when constructing this to avoid having to calculate
90+
/// them again during actual decoding.
9291
pub trait DecodeEstimate {
9392
/// Returns a conservative (err on the side of too big) estimate of the decoded length to use
9493
/// for pre-allocating buffers, etc.
9594
fn decoded_length_estimate(&self) -> usize;
9695
}
9796

98-
/// An engine that will work on all CPUs using the standard base64 alphabet and config.
97+
/// A [FastPortable] engine using the [crate::alphabet::STANDARD] base64 alphabet and [crate::engine::fast_portable::PAD] config.
9998
pub const DEFAULT_ENGINE: FastPortable =
10099
FastPortable::from(&alphabet::STANDARD, fast_portable::PAD);

src/engine/tests.rs

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@ use rstest::rstest;
66
use rstest_reuse::{apply, template};
77
use std::iter;
88

9+
use crate::tests::assert_encode_sanity;
910
use crate::{
1011
alphabet::{Alphabet, STANDARD},
11-
encode,
12+
decode_engine, encode,
1213
engine::{fast_portable, naive, Engine},
1314
tests::random_alphabet,
1415
DecodeError, PAD_BYTE,
@@ -122,7 +123,9 @@ fn encode_doesnt_write_extra_bytes<E: EngineWrapper>(engine_wrapper: E) {
122123
let mut encode_buf = Vec::<u8>::new();
123124
let mut encode_buf_backup = Vec::<u8>::new();
124125

125-
let len_range = Uniform::new(1, 1_000);
126+
let input_len_range = Uniform::new(0, 5);
127+
let prefix_len_range = Uniform::new(0, 5);
128+
let suffix_len_range = Uniform::new(0, 5);
126129

127130
for _ in 0..10_000 {
128131
let engine = E::random(&mut rng);
@@ -131,23 +134,37 @@ fn encode_doesnt_write_extra_bytes<E: EngineWrapper>(engine_wrapper: E) {
131134
encode_buf.clear();
132135
encode_buf_backup.clear();
133136

134-
let orig_len = fill_rand(&mut orig_data, &mut rng, &len_range);
135-
let expected_encode_len = engine_encoded_len(orig_len);
136-
encode_buf.resize(expected_encode_len, 0);
137+
let orig_len = fill_rand(&mut orig_data, &mut rng, &input_len_range);
138+
139+
// write a random prefix
140+
let prefix_len = fill_rand(&mut encode_buf, &mut rng, &prefix_len_range);
141+
let expected_encode_len_no_pad = engine_encoded_len(orig_len);
142+
// leave space for encoded data
143+
encode_buf.resize(expected_encode_len_no_pad + prefix_len, 0);
144+
// and a random suffix
145+
let suffix_len = fill_rand(&mut encode_buf, &mut rng, &suffix_len_range);
137146

138-
// oversize encode buffer so we can easily tell if it writes anything more than
139-
// just the encoded data
140-
fill_rand_len(&mut encode_buf, &mut rng, (expected_encode_len + 100) * 2);
141147
encode_buf_backup.extend_from_slice(&encode_buf[..]);
142148

143-
let encoded_len = engine.encode(&orig_data[..], &mut encode_buf[..]);
144-
assert_eq!(expected_encode_len, encoded_len);
149+
let encoded_len_no_pad = engine.encode(&orig_data[..], &mut encode_buf[prefix_len..]);
150+
assert_eq!(expected_encode_len_no_pad, encoded_len_no_pad);
145151

146152
// no writes past what it claimed to write
153+
assert_eq!(&encode_buf_backup[..prefix_len], &encode_buf[..prefix_len]);
147154
assert_eq!(
148-
&encode_buf_backup[encoded_len..],
149-
&encode_buf[encoded_len..]
150-
)
155+
&encode_buf_backup[(prefix_len + encoded_len_no_pad)..],
156+
&encode_buf[(prefix_len + encoded_len_no_pad)..]
157+
);
158+
159+
let encoded_data = &encode_buf[prefix_len..(prefix_len + encoded_len_no_pad)];
160+
assert_encode_sanity(
161+
std::str::from_utf8(encoded_data).unwrap(),
162+
// engines don't pad
163+
false,
164+
orig_len,
165+
);
166+
167+
assert_eq!(orig_data, decode_engine(encoded_data, &engine).unwrap());
151168
}
152169
}
153170

@@ -273,6 +290,37 @@ fn decode_detect_invalid_last_symbol_two_bytes<E: EngineWrapper>(engine_wrapper:
273290
}
274291
}
275292

293+
#[apply(all_engines)]
294+
fn decode_detect_invalid_last_symbol_when_length_is_also_invalid<E: EngineWrapper>(
295+
engine_wrapper: E,
296+
) {
297+
let mut rng = rand::rngs::SmallRng::from_entropy();
298+
299+
// check across enough lengths that it would likely cover any implementation's various internal
300+
// small/large input division
301+
for len in 0_usize..1000 {
302+
if len % 4 != 1 {
303+
continue;
304+
}
305+
306+
let engine = E::random_alphabet(&mut rng, &STANDARD);
307+
308+
let mut input = vec![b'A'; len];
309+
310+
// with a valid last char, it's InvalidLength
311+
assert_eq!(
312+
Err(DecodeError::InvalidLength),
313+
decode_engine(&input, &engine)
314+
);
315+
// after mangling the last char, it's InvalidByte
316+
input[len - 1] = b'*';
317+
assert_eq!(
318+
Err(DecodeError::InvalidByte(len - 1, b'*')),
319+
decode_engine(&input, &engine)
320+
);
321+
}
322+
}
323+
276324
#[apply(all_engines)]
277325
fn decode_detect_invalid_last_symbol_every_possible_two_symbols<E: EngineWrapper>(
278326
engine_wrapper: E,

0 commit comments

Comments
 (0)