Skip to content

Commit 3c62bd4

Browse files
Make constructing an Alphabet from a str const.
Not useful yet since unwrap() and friends aren't const, but some future rust version can make use of it.
1 parent ce8bb84 commit 3c62bd4

File tree

4 files changed

+162
-78
lines changed

4 files changed

+162
-78
lines changed

.circleci/config.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ jobs:
6767
command: |
6868
if [[ '<< parameters.rust_img >>' = 'rustlang/rust:nightly' ]]
6969
then
70-
cargo +nightly install cargo-fuzz
71-
cargo fuzz list | xargs -L 1 -I FUZZER cargo fuzz run FUZZER -- -max_total_time=1
70+
cargo install cargo-fuzz
71+
cargo fuzz list | xargs -I FUZZER cargo fuzz run FUZZER -- -max_total_time=1
7272
fi
7373
7474
- save_cache:

fuzz/fuzzers/roundtrip_no_pad.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ extern crate base64;
44
use base64::engine::fast_portable;
55

66
fuzz_target!(|data: &[u8]| {
7-
let config = fast_portable::FastPortableConfig::from(false, false);
7+
let config = fast_portable::FastPortableConfig::new().with_encode_padding(false);
88
let engine = fast_portable::FastPortable::from(&base64::alphabet::STANDARD, config);
99

1010
let encoded = base64::encode_engine(&data, &engine);

fuzz/fuzzers/utils.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ pub fn random_engine(data: &[u8]) -> fast_portable::FastPortable {
2323
alphabet::STANDARD
2424
};
2525

26-
let config = fast_portable::FastPortableConfig::from(rng.gen(), rng.gen());
26+
let config = fast_portable::FastPortableConfig::new()
27+
.with_encode_padding(rng.gen())
28+
.with_decode_allow_trailing_bits(rng.gen());
2729

2830
fast_portable::FastPortable::from(&alphabet, config)
2931
}

src/alphabet.rs

Lines changed: 156 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,164 +1,246 @@
11
//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
22
3+
#[cfg(any(feature = "std", test))]
4+
use std::{convert, error, fmt};
5+
6+
const ALPHABET_SIZE: usize = 64;
7+
38
/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
49
///
510
/// Common alphabets are provided as constants, and custom alphabets
6-
/// can be made via the [From](#impl-From<T>) implementation.
11+
/// can be made via `from_str` or the `TryFrom<str>` implementation.
712
///
813
/// ```
9-
/// let custom = base64::alphabet::Alphabet::from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
14+
/// let custom = base64::alphabet::Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
1015
///
1116
/// let engine = base64::engine::fast_portable::FastPortable::from(
1217
/// &custom,
1318
/// base64::engine::fast_portable::PAD);
1419
/// ```
15-
#[derive(Clone, Copy, Debug)]
20+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1621
pub struct Alphabet {
17-
pub(crate) symbols: [u8; 64],
22+
pub(crate) symbols: [u8; ALPHABET_SIZE],
1823
}
1924

2025
impl Alphabet {
2126
/// Performs no checks so that it can be const.
2227
/// Used only for known-valid strings.
23-
const fn from_unchecked(alphabet: &str) -> Alphabet {
24-
let mut symbols = [0_u8; 64];
28+
const fn from_str_unchecked(alphabet: &str) -> Alphabet {
29+
let mut symbols = [0_u8; ALPHABET_SIZE];
2530
let source_bytes = alphabet.as_bytes();
2631

2732
// a way to copy that's allowed in const fn
2833
let mut index = 0;
29-
while index < 64 {
34+
while index < ALPHABET_SIZE {
3035
symbols[index] = source_bytes[index];
3136
index += 1;
3237
}
3338

3439
Alphabet { symbols }
3540
}
36-
}
3741

38-
impl<T: AsRef<str>> From<T> for Alphabet {
39-
/// Create a `CharacterSet` from a string of 64 ASCII bytes. Each byte must be
40-
/// unique, and the `=` byte is not allowed as it is used for padding.
42+
/// Create a `CharacterSet` from a string of 64 unique printable ASCII bytes.
4143
///
42-
/// # Errors
44+
/// The `=` byte is not allowed as it is used for padding.
4345
///
44-
/// Panics if the text is an invalid base64 alphabet since the alphabet is
45-
/// likely to be hardcoded, and therefore errors are generally unrecoverable
46-
/// programmer errors.
47-
fn from(string: T) -> Self {
48-
let alphabet = string.as_ref();
49-
assert_eq!(
50-
64,
51-
alphabet.as_bytes().len(),
52-
"Base64 char set length must be 64"
53-
);
46+
/// The `const`-ness of this function isn't useful as of rust 1.54.0 since `const` `unwrap()`,
47+
/// etc, haven't shipped yet, but that's [on the roadmap](https://github.com/rust-lang/rust/issues/85194).
48+
pub const fn from_str(alphabet: &str) -> Result<Self, ParseAlphabetError> {
49+
let bytes = alphabet.as_bytes();
50+
if bytes.len() != ALPHABET_SIZE {
51+
return Err(ParseAlphabetError::InvalidLength);
52+
}
5453

55-
// scope just to ensure not accidentally using the sorted copy
5654
{
57-
// Check uniqueness without allocating since this must be no_std.
58-
// Could pull in heapless and use IndexSet, but this seems simple enough.
59-
let mut bytes = [0_u8; 64];
60-
alphabet
61-
.as_bytes()
62-
.iter()
63-
.enumerate()
64-
.for_each(|(index, &byte)| bytes[index] = byte);
65-
66-
bytes.sort_unstable();
67-
68-
// iterate over the sorted bytes, offset by one
69-
bytes.iter().zip(bytes[1..].iter()).for_each(|(b1, b2)| {
70-
// if any byte is the same as the next byte, there's a duplicate
71-
assert_ne!(b1, b2, "Duplicate bytes");
72-
});
55+
let mut index = 0;
56+
while index < ALPHABET_SIZE {
57+
let byte = bytes[index];
58+
59+
// must be ascii printable. 127 (DEL) is commonly considered printable
60+
// for some reason but clearly unsuitable for base64.
61+
if !(byte >= 32_u8 && byte <= 126_u8) {
62+
return Err(ParseAlphabetError::UnprintableByte(byte));
63+
}
64+
// = is assumed to be padding, so cannot be used as a symbol
65+
if b'=' == byte {
66+
return Err(ParseAlphabetError::ReservedByte(byte));
67+
}
68+
69+
// Check for duplicates while staying within what const allows.
70+
// It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
71+
// microsecond range.
72+
73+
let mut probe_index = 0;
74+
while probe_index < ALPHABET_SIZE {
75+
if probe_index == index {
76+
probe_index += 1;
77+
continue;
78+
}
79+
80+
let probe_byte = bytes[probe_index];
81+
82+
if byte == probe_byte {
83+
return Err(ParseAlphabetError::DuplicatedByte(byte));
84+
}
85+
86+
probe_index += 1;
87+
}
88+
89+
index += 1;
90+
}
7391
}
7492

75-
for &byte in alphabet.as_bytes() {
76-
// must be ascii printable. 127 (DEL) is commonly considered printable
77-
// for some reason but clearly unsuitable for base64.
78-
assert!(byte >= 32_u8 && byte < 127_u8, "Bytes must be printable");
79-
// = is assumed to be padding, so cannot be used as a symbol
80-
assert_ne!(b'=', byte, "Padding byte '=' is reserved");
81-
}
93+
Ok(Self::from_str_unchecked(alphabet))
94+
}
95+
}
96+
97+
#[cfg(any(feature = "std", test))]
98+
impl convert::TryFrom<&str> for Alphabet {
99+
type Error = ParseAlphabetError;
100+
101+
fn try_from(value: &str) -> Result<Self, Self::Error> {
102+
Alphabet::from_str(value)
103+
}
104+
}
105+
106+
/// Possible errors when constructing an [Alphabet] from a `str`.
107+
#[derive(Debug, Eq, PartialEq)]
108+
pub enum ParseAlphabetError {
109+
/// Alphabets must be 64 ASCII bytes
110+
InvalidLength,
111+
/// All bytes must be unique
112+
DuplicatedByte(u8),
113+
/// All bytes must be printable (in the range `[32, 126]`).
114+
UnprintableByte(u8),
115+
/// `=` cannot be used
116+
ReservedByte(u8),
117+
}
82118

83-
Self::from_unchecked(alphabet)
119+
#[cfg(any(feature = "std", test))]
120+
impl fmt::Display for ParseAlphabetError {
121+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122+
match self {
123+
ParseAlphabetError::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
124+
ParseAlphabetError::DuplicatedByte(b) => write!(f, "Duplicated byte: {}", b),
125+
ParseAlphabetError::UnprintableByte(b) => write!(f, "Unprintable byte: {}", b),
126+
ParseAlphabetError::ReservedByte(b) => write!(f, "Reserved byte: {}", b),
127+
}
84128
}
85129
}
86130

131+
#[cfg(any(feature = "std", test))]
132+
impl error::Error for ParseAlphabetError {}
133+
87134
/// The standard alphabet (uses `+` and `/`).
88135
///
89136
/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
90-
pub const STANDARD: Alphabet =
91-
Alphabet::from_unchecked("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
137+
pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
138+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
139+
);
92140

93141
/// The URL safe alphabet (uses `-` and `_`).
94142
///
95143
/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
96-
pub const URL_SAFE: Alphabet =
97-
Alphabet::from_unchecked("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_");
144+
pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
145+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
146+
);
98147

99148
/// The `crypt(3)` alphabet (uses `.` and `/` as the first two values).
100149
///
101150
/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
102-
pub const CRYPT: Alphabet =
103-
Alphabet::from_unchecked("./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
151+
pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
152+
"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
153+
);
104154

105155
/// The bcrypt alphabet.
106-
pub const BCRYPT: Alphabet =
107-
Alphabet::from_unchecked("./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789");
156+
pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
157+
"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
158+
);
108159

109160
/// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`).
110161
///
111162
/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
112-
pub const IMAP_MUTF7: Alphabet =
113-
Alphabet::from_unchecked("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,");
163+
pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
164+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
165+
);
114166

115167
/// The alphabet used in BinHex 4.0 files.
116168
///
117169
/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
118-
pub const BIN_HEX: Alphabet =
119-
Alphabet::from_unchecked("!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr");
170+
pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
171+
"!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr",
172+
);
120173

121174
#[cfg(test)]
122175
mod tests {
123-
use crate::alphabet::Alphabet;
176+
use crate::alphabet::*;
177+
use std::convert::TryFrom as _;
124178

125-
#[should_panic(expected = "Duplicate bytes")]
126179
#[test]
127180
fn detects_duplicate_start() {
128-
let _ = Alphabet::from("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
181+
assert_eq!(
182+
ParseAlphabetError::DuplicatedByte(b'A'),
183+
Alphabet::from_str("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
184+
.unwrap_err()
185+
);
129186
}
130187

131-
#[should_panic(expected = "Duplicate bytes")]
132188
#[test]
133189
fn detects_duplicate_end() {
134-
let _ = Alphabet::from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//");
190+
assert_eq!(
191+
ParseAlphabetError::DuplicatedByte(b'/'),
192+
Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
193+
.unwrap_err()
194+
);
135195
}
136196

137-
#[should_panic(expected = "Duplicate bytes")]
138197
#[test]
139198
fn detects_duplicate_middle() {
140-
let _ = Alphabet::from("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/");
199+
assert_eq!(
200+
ParseAlphabetError::DuplicatedByte(b'Z'),
201+
Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
202+
.unwrap_err()
203+
);
141204
}
142205

143-
#[should_panic(expected = "Base64 char set length must be 64")]
144206
#[test]
145207
fn detects_length() {
146-
let _ = Alphabet::from(
147-
"xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
208+
assert_eq!(
209+
ParseAlphabetError::InvalidLength,
210+
Alphabet::from_str(
211+
"xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
212+
)
213+
.unwrap_err()
148214
);
149215
}
150216

151-
#[should_panic(expected = "Padding byte '=' is reserved")]
152217
#[test]
153218
fn detects_padding() {
154-
let _ = Alphabet::from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=");
219+
assert_eq!(
220+
ParseAlphabetError::ReservedByte(b'='),
221+
Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
222+
.unwrap_err()
223+
);
155224
}
156225

157-
#[should_panic(expected = "Bytes must be printable")]
158226
#[test]
159227
fn detects_unprintable() {
160228
// form feed
161-
let _ =
162-
Alphabet::from("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
229+
assert_eq!(
230+
ParseAlphabetError::UnprintableByte(0xc),
231+
Alphabet::from_str(
232+
"\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
233+
)
234+
.unwrap_err()
235+
);
236+
}
237+
238+
#[test]
239+
fn same_as_unchecked() {
240+
assert_eq!(
241+
STANDARD,
242+
Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
243+
.unwrap()
244+
)
163245
}
164246
}

0 commit comments

Comments
 (0)