|
1 | 1 | //! Provides [Alphabet] and constants for alphabets commonly used in the wild. |
2 | 2 |
|
| 3 | +#[cfg(any(feature = "std", test))] |
| 4 | +use std::{convert, error, fmt}; |
| 5 | + |
| 6 | +const ALPHABET_SIZE: usize = 64; |
| 7 | + |
3 | 8 | /// An alphabet defines the 64 ASCII characters (symbols) used for base64. |
4 | 9 | /// |
5 | 10 | /// Common alphabets are provided as constants, and custom alphabets |
6 | | -/// can be made via the [From](#impl-From<T>) implementation. |
| 11 | +/// can be made via `from_str` or the `TryFrom<str>` implementation. |
7 | 12 | /// |
8 | 13 | /// ``` |
9 | | -/// let custom = base64::alphabet::Alphabet::from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); |
| 14 | +/// let custom = base64::alphabet::Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap(); |
10 | 15 | /// |
11 | 16 | /// let engine = base64::engine::fast_portable::FastPortable::from( |
12 | 17 | /// &custom, |
13 | 18 | /// base64::engine::fast_portable::PAD); |
14 | 19 | /// ``` |
15 | | -#[derive(Clone, Copy, Debug)] |
| 20 | +#[derive(Clone, Copy, Debug, Eq, PartialEq)] |
16 | 21 | pub struct Alphabet { |
17 | | - pub(crate) symbols: [u8; 64], |
| 22 | + pub(crate) symbols: [u8; ALPHABET_SIZE], |
18 | 23 | } |
19 | 24 |
|
20 | 25 | impl Alphabet { |
21 | 26 | /// Performs no checks so that it can be const. |
22 | 27 | /// Used only for known-valid strings. |
23 | | - const fn from_unchecked(alphabet: &str) -> Alphabet { |
24 | | - let mut symbols = [0_u8; 64]; |
| 28 | + const fn from_str_unchecked(alphabet: &str) -> Alphabet { |
| 29 | + let mut symbols = [0_u8; ALPHABET_SIZE]; |
25 | 30 | let source_bytes = alphabet.as_bytes(); |
26 | 31 |
|
27 | 32 | // a way to copy that's allowed in const fn |
28 | 33 | let mut index = 0; |
29 | | - while index < 64 { |
| 34 | + while index < ALPHABET_SIZE { |
30 | 35 | symbols[index] = source_bytes[index]; |
31 | 36 | index += 1; |
32 | 37 | } |
33 | 38 |
|
34 | 39 | Alphabet { symbols } |
35 | 40 | } |
36 | | -} |
37 | 41 |
|
38 | | -impl<T: AsRef<str>> From<T> for Alphabet { |
39 | | - /// Create a `CharacterSet` from a string of 64 ASCII bytes. Each byte must be |
40 | | - /// unique, and the `=` byte is not allowed as it is used for padding. |
| 42 | + /// Create a `CharacterSet` from a string of 64 unique printable ASCII bytes. |
41 | 43 | /// |
42 | | - /// # Errors |
| 44 | + /// The `=` byte is not allowed as it is used for padding. |
43 | 45 | /// |
44 | | - /// Panics if the text is an invalid base64 alphabet since the alphabet is |
45 | | - /// likely to be hardcoded, and therefore errors are generally unrecoverable |
46 | | - /// programmer errors. |
47 | | - fn from(string: T) -> Self { |
48 | | - let alphabet = string.as_ref(); |
49 | | - assert_eq!( |
50 | | - 64, |
51 | | - alphabet.as_bytes().len(), |
52 | | - "Base64 char set length must be 64" |
53 | | - ); |
| 46 | + /// The `const`-ness of this function isn't useful as of rust 1.54.0 since `const` `unwrap()`, |
| 47 | + /// etc, haven't shipped yet, but that's [on the roadmap](https://github.com/rust-lang/rust/issues/85194). |
| 48 | + pub const fn from_str(alphabet: &str) -> Result<Self, ParseAlphabetError> { |
| 49 | + let bytes = alphabet.as_bytes(); |
| 50 | + if bytes.len() != ALPHABET_SIZE { |
| 51 | + return Err(ParseAlphabetError::InvalidLength); |
| 52 | + } |
54 | 53 |
|
55 | | - // scope just to ensure not accidentally using the sorted copy |
56 | 54 | { |
57 | | - // Check uniqueness without allocating since this must be no_std. |
58 | | - // Could pull in heapless and use IndexSet, but this seems simple enough. |
59 | | - let mut bytes = [0_u8; 64]; |
60 | | - alphabet |
61 | | - .as_bytes() |
62 | | - .iter() |
63 | | - .enumerate() |
64 | | - .for_each(|(index, &byte)| bytes[index] = byte); |
65 | | - |
66 | | - bytes.sort_unstable(); |
67 | | - |
68 | | - // iterate over the sorted bytes, offset by one |
69 | | - bytes.iter().zip(bytes[1..].iter()).for_each(|(b1, b2)| { |
70 | | - // if any byte is the same as the next byte, there's a duplicate |
71 | | - assert_ne!(b1, b2, "Duplicate bytes"); |
72 | | - }); |
| 55 | + let mut index = 0; |
| 56 | + while index < ALPHABET_SIZE { |
| 57 | + let byte = bytes[index]; |
| 58 | + |
| 59 | + // must be ascii printable. 127 (DEL) is commonly considered printable |
| 60 | + // for some reason but clearly unsuitable for base64. |
| 61 | + if !(byte >= 32_u8 && byte <= 126_u8) { |
| 62 | + return Err(ParseAlphabetError::UnprintableByte(byte)); |
| 63 | + } |
| 64 | + // = is assumed to be padding, so cannot be used as a symbol |
| 65 | + if b'=' == byte { |
| 66 | + return Err(ParseAlphabetError::ReservedByte(byte)); |
| 67 | + } |
| 68 | + |
| 69 | + // Check for duplicates while staying within what const allows. |
| 70 | + // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit |
| 71 | + // microsecond range. |
| 72 | + |
| 73 | + let mut probe_index = 0; |
| 74 | + while probe_index < ALPHABET_SIZE { |
| 75 | + if probe_index == index { |
| 76 | + probe_index += 1; |
| 77 | + continue; |
| 78 | + } |
| 79 | + |
| 80 | + let probe_byte = bytes[probe_index]; |
| 81 | + |
| 82 | + if byte == probe_byte { |
| 83 | + return Err(ParseAlphabetError::DuplicatedByte(byte)); |
| 84 | + } |
| 85 | + |
| 86 | + probe_index += 1; |
| 87 | + } |
| 88 | + |
| 89 | + index += 1; |
| 90 | + } |
73 | 91 | } |
74 | 92 |
|
75 | | - for &byte in alphabet.as_bytes() { |
76 | | - // must be ascii printable. 127 (DEL) is commonly considered printable |
77 | | - // for some reason but clearly unsuitable for base64. |
78 | | - assert!(byte >= 32_u8 && byte < 127_u8, "Bytes must be printable"); |
79 | | - // = is assumed to be padding, so cannot be used as a symbol |
80 | | - assert_ne!(b'=', byte, "Padding byte '=' is reserved"); |
81 | | - } |
| 93 | + Ok(Self::from_str_unchecked(alphabet)) |
| 94 | + } |
| 95 | +} |
| 96 | + |
| 97 | +#[cfg(any(feature = "std", test))] |
| 98 | +impl convert::TryFrom<&str> for Alphabet { |
| 99 | + type Error = ParseAlphabetError; |
| 100 | + |
| 101 | + fn try_from(value: &str) -> Result<Self, Self::Error> { |
| 102 | + Alphabet::from_str(value) |
| 103 | + } |
| 104 | +} |
| 105 | + |
| 106 | +/// Possible errors when constructing an [Alphabet] from a `str`. |
| 107 | +#[derive(Debug, Eq, PartialEq)] |
| 108 | +pub enum ParseAlphabetError { |
| 109 | + /// Alphabets must be 64 ASCII bytes |
| 110 | + InvalidLength, |
| 111 | + /// All bytes must be unique |
| 112 | + DuplicatedByte(u8), |
| 113 | + /// All bytes must be printable (in the range `[32, 126]`). |
| 114 | + UnprintableByte(u8), |
| 115 | + /// `=` cannot be used |
| 116 | + ReservedByte(u8), |
| 117 | +} |
82 | 118 |
|
83 | | - Self::from_unchecked(alphabet) |
| 119 | +#[cfg(any(feature = "std", test))] |
| 120 | +impl fmt::Display for ParseAlphabetError { |
| 121 | + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 122 | + match self { |
| 123 | + ParseAlphabetError::InvalidLength => write!(f, "Invalid length - must be 64 bytes"), |
| 124 | + ParseAlphabetError::DuplicatedByte(b) => write!(f, "Duplicated byte: {}", b), |
| 125 | + ParseAlphabetError::UnprintableByte(b) => write!(f, "Unprintable byte: {}", b), |
| 126 | + ParseAlphabetError::ReservedByte(b) => write!(f, "Reserved byte: {}", b), |
| 127 | + } |
84 | 128 | } |
85 | 129 | } |
86 | 130 |
|
| 131 | +#[cfg(any(feature = "std", test))] |
| 132 | +impl error::Error for ParseAlphabetError {} |
| 133 | + |
87 | 134 | /// The standard alphabet (uses `+` and `/`). |
88 | 135 | /// |
89 | 136 | /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3). |
90 | | -pub const STANDARD: Alphabet = |
91 | | - Alphabet::from_unchecked("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); |
| 137 | +pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( |
| 138 | + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", |
| 139 | +); |
92 | 140 |
|
93 | 141 | /// The URL safe alphabet (uses `-` and `_`). |
94 | 142 | /// |
95 | 143 | /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4). |
96 | | -pub const URL_SAFE: Alphabet = |
97 | | - Alphabet::from_unchecked("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"); |
| 144 | +pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( |
| 145 | + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", |
| 146 | +); |
98 | 147 |
|
99 | 148 | /// The `crypt(3)` alphabet (uses `.` and `/` as the first two values). |
100 | 149 | /// |
101 | 150 | /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. |
102 | | -pub const CRYPT: Alphabet = |
103 | | - Alphabet::from_unchecked("./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); |
| 151 | +pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( |
| 152 | + "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", |
| 153 | +); |
104 | 154 |
|
105 | 155 | /// The bcrypt alphabet. |
106 | | -pub const BCRYPT: Alphabet = |
107 | | - Alphabet::from_unchecked("./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"); |
| 156 | +pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( |
| 157 | + "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", |
| 158 | +); |
108 | 159 |
|
109 | 160 | /// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`). |
110 | 161 | /// |
111 | 162 | /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) |
112 | | -pub const IMAP_MUTF7: Alphabet = |
113 | | - Alphabet::from_unchecked("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"); |
| 163 | +pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( |
| 164 | + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,", |
| 165 | +); |
114 | 166 |
|
115 | 167 | /// The alphabet used in BinHex 4.0 files. |
116 | 168 | /// |
117 | 169 | /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) |
118 | | -pub const BIN_HEX: Alphabet = |
119 | | - Alphabet::from_unchecked("!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr"); |
| 170 | +pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( |
| 171 | + "!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr", |
| 172 | +); |
120 | 173 |
|
121 | 174 | #[cfg(test)] |
122 | 175 | mod tests { |
123 | | - use crate::alphabet::Alphabet; |
| 176 | + use crate::alphabet::*; |
| 177 | + use std::convert::TryFrom as _; |
124 | 178 |
|
125 | | - #[should_panic(expected = "Duplicate bytes")] |
126 | 179 | #[test] |
127 | 180 | fn detects_duplicate_start() { |
128 | | - let _ = Alphabet::from("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); |
| 181 | + assert_eq!( |
| 182 | + ParseAlphabetError::DuplicatedByte(b'A'), |
| 183 | + Alphabet::from_str("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") |
| 184 | + .unwrap_err() |
| 185 | + ); |
129 | 186 | } |
130 | 187 |
|
131 | | - #[should_panic(expected = "Duplicate bytes")] |
132 | 188 | #[test] |
133 | 189 | fn detects_duplicate_end() { |
134 | | - let _ = Alphabet::from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//"); |
| 190 | + assert_eq!( |
| 191 | + ParseAlphabetError::DuplicatedByte(b'/'), |
| 192 | + Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//") |
| 193 | + .unwrap_err() |
| 194 | + ); |
135 | 195 | } |
136 | 196 |
|
137 | | - #[should_panic(expected = "Duplicate bytes")] |
138 | 197 | #[test] |
139 | 198 | fn detects_duplicate_middle() { |
140 | | - let _ = Alphabet::from("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/"); |
| 199 | + assert_eq!( |
| 200 | + ParseAlphabetError::DuplicatedByte(b'Z'), |
| 201 | + Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/") |
| 202 | + .unwrap_err() |
| 203 | + ); |
141 | 204 | } |
142 | 205 |
|
143 | | - #[should_panic(expected = "Base64 char set length must be 64")] |
144 | 206 | #[test] |
145 | 207 | fn detects_length() { |
146 | | - let _ = Alphabet::from( |
147 | | - "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/", |
| 208 | + assert_eq!( |
| 209 | + ParseAlphabetError::InvalidLength, |
| 210 | + Alphabet::from_str( |
| 211 | + "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/", |
| 212 | + ) |
| 213 | + .unwrap_err() |
148 | 214 | ); |
149 | 215 | } |
150 | 216 |
|
151 | | - #[should_panic(expected = "Padding byte '=' is reserved")] |
152 | 217 | #[test] |
153 | 218 | fn detects_padding() { |
154 | | - let _ = Alphabet::from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+="); |
| 219 | + assert_eq!( |
| 220 | + ParseAlphabetError::ReservedByte(b'='), |
| 221 | + Alphabet::from_str("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=") |
| 222 | + .unwrap_err() |
| 223 | + ); |
155 | 224 | } |
156 | 225 |
|
157 | | - #[should_panic(expected = "Bytes must be printable")] |
158 | 226 | #[test] |
159 | 227 | fn detects_unprintable() { |
160 | 228 | // form feed |
161 | | - let _ = |
162 | | - Alphabet::from("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); |
| 229 | + assert_eq!( |
| 230 | + ParseAlphabetError::UnprintableByte(0xc), |
| 231 | + Alphabet::from_str( |
| 232 | + "\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" |
| 233 | + ) |
| 234 | + .unwrap_err() |
| 235 | + ); |
| 236 | + } |
| 237 | + |
| 238 | + #[test] |
| 239 | + fn same_as_unchecked() { |
| 240 | + assert_eq!( |
| 241 | + STANDARD, |
| 242 | + Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") |
| 243 | + .unwrap() |
| 244 | + ) |
163 | 245 | } |
164 | 246 | } |
0 commit comments