|
| 1 | +//! Base32 encoding and decoding implementation. |
| 2 | +//! |
| 3 | +//! Base32 is a binary-to-text encoding scheme that represents binary data using 32 ASCII characters |
| 4 | +//! (A-Z and 2-7). It's commonly used when case-insensitive encoding is needed or when avoiding |
| 5 | +//! characters that might be confused (like 0/O or 1/l). |
| 6 | +//! |
| 7 | +//! This implementation follows the standard Base32 alphabet as defined in RFC 4648. |
| 8 | +
|
| 9 | +const B32_CHARSET: &[u8; 32] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"; |
| 10 | + |
| 11 | +/// Encodes the given bytes into base32. |
| 12 | +/// |
| 13 | +/// The function converts binary data into base32 format using the standard alphabet. |
| 14 | +/// Output is padded with '=' characters to make the length a multiple of 8. |
| 15 | +/// |
| 16 | +/// # Arguments |
| 17 | +/// |
| 18 | +/// * `data` - A byte slice to encode |
| 19 | +/// |
| 20 | +/// # Returns |
| 21 | +/// |
| 22 | +/// A `Vec<u8>` containing the base32-encoded data with padding. |
| 23 | +/// |
| 24 | +/// # Examples |
| 25 | +/// |
| 26 | +/// ``` |
| 27 | +/// use the_algorithms_rust::ciphers::base32_encode; |
| 28 | +/// assert_eq!(base32_encode(b"Hello World!"), b"JBSWY3DPEBLW64TMMQQQ===="); |
| 29 | +/// assert_eq!(base32_encode(b"123456"), b"GEZDGNBVGY======"); |
| 30 | +/// assert_eq!(base32_encode(b"some long complex string"), b"ONXW2ZJANRXW4ZZAMNXW24DMMV4CA43UOJUW4ZY="); |
| 31 | +/// ``` |
| 32 | +pub fn base32_encode(data: &[u8]) -> Vec<u8> { |
| 33 | + if data.is_empty() { |
| 34 | + return Vec::new(); |
| 35 | + } |
| 36 | + |
| 37 | + // Convert bytes to binary string representation |
| 38 | + use std::fmt::Write; |
| 39 | + let mut binary_data = String::with_capacity(data.len() * 8); |
| 40 | + for byte in data { |
| 41 | + write!(binary_data, "{byte:08b}").unwrap(); |
| 42 | + } |
| 43 | + |
| 44 | + // Pad binary data to be a multiple of 5 bits |
| 45 | + let padding_needed = (5 - (binary_data.len() % 5)) % 5; |
| 46 | + for _ in 0..padding_needed { |
| 47 | + binary_data.push('0'); |
| 48 | + } |
| 49 | + |
| 50 | + // Convert 5-bit chunks to base32 characters |
| 51 | + let mut result = Vec::new(); |
| 52 | + for chunk in binary_data.as_bytes().chunks(5) { |
| 53 | + let chunk_str = std::str::from_utf8(chunk).unwrap(); |
| 54 | + let index = usize::from_str_radix(chunk_str, 2).unwrap(); |
| 55 | + result.push(B32_CHARSET[index]); |
| 56 | + } |
| 57 | + |
| 58 | + // Pad result to be a multiple of 8 characters |
| 59 | + while !result.len().is_multiple_of(8) { |
| 60 | + result.push(b'='); |
| 61 | + } |
| 62 | + |
| 63 | + result |
| 64 | +} |
| 65 | + |
| 66 | +/// Decodes base32-encoded data into bytes. |
| 67 | +/// |
| 68 | +/// The function decodes base32 format back to binary data, removing padding characters. |
| 69 | +/// |
| 70 | +/// # Arguments |
| 71 | +/// |
| 72 | +/// * `data` - A byte slice containing base32-encoded data |
| 73 | +/// |
| 74 | +/// # Returns |
| 75 | +/// |
| 76 | +/// * `Ok(Vec<u8>)` - Successfully decoded bytes |
| 77 | +/// * `Err(String)` - Error message if the input is invalid |
| 78 | +/// |
| 79 | +/// # Errors |
| 80 | +/// |
| 81 | +/// Returns an error if: |
| 82 | +/// - The input contains invalid base32 characters |
| 83 | +/// - The input cannot be properly decoded |
| 84 | +/// |
| 85 | +/// # Examples |
| 86 | +/// |
| 87 | +/// ``` |
| 88 | +/// use the_algorithms_rust::ciphers::base32_decode; |
| 89 | +/// assert_eq!(base32_decode(b"JBSWY3DPEBLW64TMMQQQ====").unwrap(), b"Hello World!"); |
| 90 | +/// assert_eq!(base32_decode(b"GEZDGNBVGY======").unwrap(), b"123456"); |
| 91 | +/// assert_eq!(base32_decode(b"ONXW2ZJANRXW4ZZAMNXW24DMMV4CA43UOJUW4ZY=").unwrap(), b"some long complex string"); |
| 92 | +/// ``` |
| 93 | +pub fn base32_decode(data: &[u8]) -> Result<Vec<u8>, String> { |
| 94 | + if data.is_empty() { |
| 95 | + return Ok(Vec::new()); |
| 96 | + } |
| 97 | + |
| 98 | + // Remove padding and convert to string |
| 99 | + let data_str = |
| 100 | + std::str::from_utf8(data).map_err(|_| "Invalid UTF-8 in base32 data".to_string())?; |
| 101 | + let data_stripped = data_str.trim_end_matches('='); |
| 102 | + |
| 103 | + // Convert base32 characters to binary string |
| 104 | + use std::fmt::Write; |
| 105 | + let mut binary_chunks = String::with_capacity(data_stripped.len() * 5); |
| 106 | + for ch in data_stripped.chars() { |
| 107 | + // Find the index of this character in the charset |
| 108 | + let index = B32_CHARSET |
| 109 | + .iter() |
| 110 | + .position(|&c| c == ch as u8) |
| 111 | + .ok_or_else(|| format!("Invalid base32 character: {ch}"))?; |
| 112 | + |
| 113 | + // Convert index to 5-bit binary string |
| 114 | + write!(binary_chunks, "{index:05b}").unwrap(); |
| 115 | + } |
| 116 | + |
| 117 | + // Convert 8-bit chunks back to bytes |
| 118 | + let mut result = Vec::new(); |
| 119 | + for chunk in binary_chunks.as_bytes().chunks(8) { |
| 120 | + if chunk.len() == 8 { |
| 121 | + let chunk_str = std::str::from_utf8(chunk).unwrap(); |
| 122 | + let byte_value = u8::from_str_radix(chunk_str, 2) |
| 123 | + .map_err(|_| "Failed to parse binary chunk".to_string())?; |
| 124 | + result.push(byte_value); |
| 125 | + } |
| 126 | + } |
| 127 | + |
| 128 | + Ok(result) |
| 129 | +} |
| 130 | + |
| 131 | +#[cfg(test)] |
| 132 | +mod tests { |
| 133 | + use super::*; |
| 134 | + |
| 135 | + #[test] |
| 136 | + fn test_encode_hello_world() { |
| 137 | + assert_eq!(base32_encode(b"Hello World!"), b"JBSWY3DPEBLW64TMMQQQ===="); |
| 138 | + } |
| 139 | + |
| 140 | + #[test] |
| 141 | + fn test_encode_numbers() { |
| 142 | + assert_eq!(base32_encode(b"123456"), b"GEZDGNBVGY======"); |
| 143 | + } |
| 144 | + |
| 145 | + #[test] |
| 146 | + fn test_encode_long_string() { |
| 147 | + assert_eq!( |
| 148 | + base32_encode(b"some long complex string"), |
| 149 | + b"ONXW2ZJANRXW4ZZAMNXW24DMMV4CA43UOJUW4ZY=" |
| 150 | + ); |
| 151 | + } |
| 152 | + |
| 153 | + #[test] |
| 154 | + fn test_encode_empty() { |
| 155 | + assert_eq!(base32_encode(b""), b""); |
| 156 | + } |
| 157 | + |
| 158 | + #[test] |
| 159 | + fn test_encode_single_char() { |
| 160 | + assert_eq!(base32_encode(b"A"), b"IE======"); |
| 161 | + } |
| 162 | + |
| 163 | + #[test] |
| 164 | + fn test_decode_hello_world() { |
| 165 | + assert_eq!( |
| 166 | + base32_decode(b"JBSWY3DPEBLW64TMMQQQ====").unwrap(), |
| 167 | + b"Hello World!" |
| 168 | + ); |
| 169 | + } |
| 170 | + |
| 171 | + #[test] |
| 172 | + fn test_decode_numbers() { |
| 173 | + assert_eq!(base32_decode(b"GEZDGNBVGY======").unwrap(), b"123456"); |
| 174 | + } |
| 175 | + |
| 176 | + #[test] |
| 177 | + fn test_decode_long_string() { |
| 178 | + assert_eq!( |
| 179 | + base32_decode(b"ONXW2ZJANRXW4ZZAMNXW24DMMV4CA43UOJUW4ZY=").unwrap(), |
| 180 | + b"some long complex string" |
| 181 | + ); |
| 182 | + } |
| 183 | + |
| 184 | + #[test] |
| 185 | + fn test_decode_empty() { |
| 186 | + assert_eq!(base32_decode(b"").unwrap(), b""); |
| 187 | + } |
| 188 | + |
| 189 | + #[test] |
| 190 | + fn test_decode_single_char() { |
| 191 | + assert_eq!(base32_decode(b"IE======").unwrap(), b"A"); |
| 192 | + } |
| 193 | + |
| 194 | + #[test] |
| 195 | + fn test_decode_without_padding() { |
| 196 | + assert_eq!( |
| 197 | + base32_decode(b"JBSWY3DPEBLW64TMMQQQ").unwrap(), |
| 198 | + b"Hello World!" |
| 199 | + ); |
| 200 | + } |
| 201 | + |
| 202 | + #[test] |
| 203 | + fn test_decode_invalid_character() { |
| 204 | + let result = base32_decode(b"INVALID!@#$"); |
| 205 | + assert!(result.is_err()); |
| 206 | + assert!(result.unwrap_err().contains("Invalid base32 character")); |
| 207 | + } |
| 208 | + |
| 209 | + #[test] |
| 210 | + fn test_roundtrip_hello() { |
| 211 | + let original = b"Hello"; |
| 212 | + let encoded = base32_encode(original); |
| 213 | + let decoded = base32_decode(&encoded).unwrap(); |
| 214 | + assert_eq!(decoded, original); |
| 215 | + } |
| 216 | + |
| 217 | + #[test] |
| 218 | + fn test_roundtrip_various_strings() { |
| 219 | + let test_cases = vec![ |
| 220 | + b"a" as &[u8], |
| 221 | + b"ab", |
| 222 | + b"abc", |
| 223 | + b"abcd", |
| 224 | + b"abcde", |
| 225 | + b"The quick brown fox jumps over the lazy dog", |
| 226 | + b"1234567890", |
| 227 | + b"!@#$%^&*()", |
| 228 | + ]; |
| 229 | + |
| 230 | + for original in test_cases { |
| 231 | + let encoded = base32_encode(original); |
| 232 | + let decoded = base32_decode(&encoded).unwrap(); |
| 233 | + assert_eq!(decoded, original, "Failed for: {original:?}"); |
| 234 | + } |
| 235 | + } |
| 236 | + |
| 237 | + #[test] |
| 238 | + fn test_all_charset_characters() { |
| 239 | + // Test that all characters in the charset can be encoded/decoded |
| 240 | + for i in 0..32 { |
| 241 | + let data = vec![i * 8]; // Arbitrary byte values |
| 242 | + let encoded = base32_encode(&data); |
| 243 | + let decoded = base32_decode(&encoded).unwrap(); |
| 244 | + assert_eq!(decoded, data); |
| 245 | + } |
| 246 | + } |
| 247 | + |
| 248 | + #[test] |
| 249 | + fn test_binary_data() { |
| 250 | + let binary_data = vec![0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD]; |
| 251 | + let encoded = base32_encode(&binary_data); |
| 252 | + let decoded = base32_decode(&encoded).unwrap(); |
| 253 | + assert_eq!(decoded, binary_data); |
| 254 | + } |
| 255 | + |
| 256 | + #[test] |
| 257 | + fn test_padding_variations() { |
| 258 | + // Test different amounts of padding |
| 259 | + let test_cases: Vec<(&[u8], &[u8])> = vec![ |
| 260 | + (b"f", b"MY======"), |
| 261 | + (b"fo", b"MZXQ===="), |
| 262 | + (b"foo", b"MZXW6==="), |
| 263 | + (b"foob", b"MZXW6YQ="), |
| 264 | + (b"fooba", b"MZXW6YTB"), |
| 265 | + (b"foobar", b"MZXW6YTBOI======"), |
| 266 | + ]; |
| 267 | + |
| 268 | + for (input, expected) in test_cases { |
| 269 | + let encoded = base32_encode(input); |
| 270 | + assert_eq!(encoded, expected, "Encoding failed for: {input:?}"); |
| 271 | + let decoded = base32_decode(&encoded).unwrap(); |
| 272 | + assert_eq!(decoded, input, "Roundtrip failed for: {input:?}"); |
| 273 | + } |
| 274 | + } |
| 275 | +} |
0 commit comments