|
| 1 | +/* |
| 2 | + * |
| 3 | + * Copyright (c) 2020-2022 Project CHIP Authors |
| 4 | + * |
| 5 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | + * you may not use this file except in compliance with the License. |
| 7 | + * You may obtain a copy of the License at |
| 8 | + * |
| 9 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | + * |
| 11 | + * Unless required by applicable law or agreed to in writing, software |
| 12 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + * See the License for the specific language governing permissions and |
| 15 | + * limitations under the License. |
| 16 | + */ |
| 17 | + |
| 18 | +//! Base38 encoding and decoding functions. |
| 19 | +
|
| 20 | +use crate::error::Error; |
| 21 | + |
| 22 | +const BASE38_CHARS: [char; 38] = [ |
| 23 | + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', |
| 24 | + 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-', '.', |
| 25 | +]; |
| 26 | + |
| 27 | +const UNUSED: u8 = 255; |
| 28 | + |
| 29 | +// map of base38 charater to numeric value |
| 30 | +// subtract 45 from the character, then index into this array, if possible |
| 31 | +const DECODE_BASE38: [u8; 46] = [ |
| 32 | + 36, // '-', =45 |
| 33 | + 37, // '.', =46 |
| 34 | + UNUSED, // '/', =47 |
| 35 | + 0, // '0', =48 |
| 36 | + 1, // '1', =49 |
| 37 | + 2, // '2', =50 |
| 38 | + 3, // '3', =51 |
| 39 | + 4, // '4', =52 |
| 40 | + 5, // '5', =53 |
| 41 | + 6, // '6', =54 |
| 42 | + 7, // '7', =55 |
| 43 | + 8, // '8', =56 |
| 44 | + 9, // '9', =57 |
| 45 | + UNUSED, // ':', =58 |
| 46 | + UNUSED, // ';', =59 |
| 47 | + UNUSED, // '<', =50 |
| 48 | + UNUSED, // '=', =61 |
| 49 | + UNUSED, // '>', =62 |
| 50 | + UNUSED, // '?', =63 |
| 51 | + UNUSED, // '@', =64 |
| 52 | + 10, // 'A', =65 |
| 53 | + 11, // 'B', =66 |
| 54 | + 12, // 'C', =67 |
| 55 | + 13, // 'D', =68 |
| 56 | + 14, // 'E', =69 |
| 57 | + 15, // 'F', =70 |
| 58 | + 16, // 'G', =71 |
| 59 | + 17, // 'H', =72 |
| 60 | + 18, // 'I', =73 |
| 61 | + 19, // 'J', =74 |
| 62 | + 20, // 'K', =75 |
| 63 | + 21, // 'L', =76 |
| 64 | + 22, // 'M', =77 |
| 65 | + 23, // 'N', =78 |
| 66 | + 24, // 'O', =79 |
| 67 | + 25, // 'P', =80 |
| 68 | + 26, // 'Q', =81 |
| 69 | + 27, // 'R', =82 |
| 70 | + 28, // 'S', =83 |
| 71 | + 29, // 'T', =84 |
| 72 | + 30, // 'U', =85 |
| 73 | + 31, // 'V', =86 |
| 74 | + 32, // 'W', =87 |
| 75 | + 33, // 'X', =88 |
| 76 | + 34, // 'Y', =89 |
| 77 | + 35, // 'Z', =90 |
| 78 | +]; |
| 79 | + |
| 80 | +const BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK: [u8; 3] = [2, 4, 5]; |
| 81 | +const RADIX: u32 = BASE38_CHARS.len() as u32; |
| 82 | + |
| 83 | +/// Encode a byte array into a base38 string. |
| 84 | +/// |
| 85 | +/// # Arguments |
| 86 | +/// * `bytes` - byte array to encode |
| 87 | +/// * `length` - optional length of the byte array to encode. If not specified, the entire byte array is encoded. |
| 88 | +pub fn encode(bytes: &[u8], length: Option<usize>) -> String { |
| 89 | + let mut offset = 0; |
| 90 | + let mut result = String::new(); |
| 91 | + |
| 92 | + // if length is specified, use it, otherwise use the length of the byte array |
| 93 | + // if length is specified but is greater than the length of the byte array, use the length of the byte array |
| 94 | + let b_len = bytes.len(); |
| 95 | + let length = length.map(|l| l.min(b_len)).unwrap_or(b_len); |
| 96 | + |
| 97 | + while offset < length { |
| 98 | + let remaining = length - offset; |
| 99 | + match remaining.cmp(&2) { |
| 100 | + std::cmp::Ordering::Greater => { |
| 101 | + result.push_str(&encode_base38( |
| 102 | + ((bytes[offset + 2] as u32) << 16) |
| 103 | + | ((bytes[offset + 1] as u32) << 8) |
| 104 | + | (bytes[offset] as u32), |
| 105 | + 5, |
| 106 | + )); |
| 107 | + offset += 3; |
| 108 | + } |
| 109 | + std::cmp::Ordering::Equal => { |
| 110 | + result.push_str(&encode_base38( |
| 111 | + ((bytes[offset + 1] as u32) << 8) | (bytes[offset] as u32), |
| 112 | + 4, |
| 113 | + )); |
| 114 | + break; |
| 115 | + } |
| 116 | + std::cmp::Ordering::Less => { |
| 117 | + result.push_str(&encode_base38(bytes[offset] as u32, 2)); |
| 118 | + break; |
| 119 | + } |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + result |
| 124 | +} |
| 125 | + |
| 126 | +fn encode_base38(mut value: u32, char_count: u8) -> String { |
| 127 | + let mut result = String::new(); |
| 128 | + for _ in 0..char_count { |
| 129 | + let remainder = value % 38; |
| 130 | + result.push(BASE38_CHARS[remainder as usize]); |
| 131 | + value = (value - remainder) / 38; |
| 132 | + } |
| 133 | + result |
| 134 | +} |
| 135 | + |
| 136 | +/// Decode a base38-encoded string into a byte slice |
| 137 | +/// |
| 138 | +/// # Arguments |
| 139 | +/// * `base38_str` - base38-encoded string to decode |
| 140 | +/// |
| 141 | +/// Fails if the string contains invalid characters |
| 142 | +pub fn decode(base38_str: &str) -> Result<Vec<u8>, Error> { |
| 143 | + let mut result = Vec::new(); |
| 144 | + let mut base38_characters_number: usize = base38_str.len(); |
| 145 | + let mut decoded_base38_characters: usize = 0; |
| 146 | + |
| 147 | + while base38_characters_number > 0 { |
| 148 | + let base38_characters_in_chunk: usize; |
| 149 | + let bytes_in_decoded_chunk: usize; |
| 150 | + |
| 151 | + if base38_characters_number >= BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[2] as usize { |
| 152 | + base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[2] as usize; |
| 153 | + bytes_in_decoded_chunk = 3; |
| 154 | + } else if base38_characters_number == BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[1] as usize { |
| 155 | + base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[1] as usize; |
| 156 | + bytes_in_decoded_chunk = 2; |
| 157 | + } else if base38_characters_number == BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[0] as usize { |
| 158 | + base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[0] as usize; |
| 159 | + bytes_in_decoded_chunk = 1; |
| 160 | + } else { |
| 161 | + return Err(Error::InvalidData); |
| 162 | + } |
| 163 | + |
| 164 | + let mut value = 0u32; |
| 165 | + |
| 166 | + for i in (1..=base38_characters_in_chunk).rev() { |
| 167 | + let mut base38_chars = base38_str.chars(); |
| 168 | + let v = decode_char(base38_chars.nth(decoded_base38_characters + i - 1).unwrap())?; |
| 169 | + |
| 170 | + value = value * RADIX + v as u32; |
| 171 | + } |
| 172 | + |
| 173 | + decoded_base38_characters += base38_characters_in_chunk; |
| 174 | + base38_characters_number -= base38_characters_in_chunk; |
| 175 | + |
| 176 | + for _i in 0..bytes_in_decoded_chunk { |
| 177 | + result.push(value as u8); |
| 178 | + value >>= 8; |
| 179 | + } |
| 180 | + |
| 181 | + if value > 0 { |
| 182 | + // encoded value is too big to represent a correct chunk of size 1, 2 or 3 bytes |
| 183 | + return Err(Error::InvalidArgument); |
| 184 | + } |
| 185 | + } |
| 186 | + |
| 187 | + Ok(result) |
| 188 | +} |
| 189 | + |
| 190 | +fn decode_char(c: char) -> Result<u8, Error> { |
| 191 | + let c = c as u8; |
| 192 | + if !(45..=90).contains(&c) { |
| 193 | + return Err(Error::InvalidData); |
| 194 | + } |
| 195 | + |
| 196 | + let c = DECODE_BASE38[c as usize - 45]; |
| 197 | + if c == UNUSED { |
| 198 | + return Err(Error::InvalidData); |
| 199 | + } |
| 200 | + |
| 201 | + Ok(c) |
| 202 | +} |
| 203 | + |
| 204 | +#[cfg(test)] |
| 205 | +mod tests { |
| 206 | + use super::*; |
| 207 | + const ENCODED: &str = "-MOA57ZU02IT2L2BJ00"; |
| 208 | + const DECODED: [u8; 11] = [ |
| 209 | + 0x88, 0xff, 0xa7, 0x91, 0x50, 0x40, 0x00, 0x47, 0x51, 0xdd, 0x02, |
| 210 | + ]; |
| 211 | + |
| 212 | + #[test] |
| 213 | + fn can_base38_encode() { |
| 214 | + assert_eq!(encode(&DECODED, None), ENCODED); |
| 215 | + assert_eq!(encode(&DECODED, Some(11)), ENCODED); |
| 216 | + |
| 217 | + // length is greater than the length of the byte array |
| 218 | + assert_eq!(encode(&DECODED, Some(12)), ENCODED); |
| 219 | + } |
| 220 | + |
| 221 | + #[test] |
| 222 | + fn can_base38_decode() { |
| 223 | + assert_eq!(decode(ENCODED).expect("can not decode base38"), DECODED); |
| 224 | + } |
| 225 | +} |
0 commit comments