Skip to content

Commit b03ec78

Browse files
committed
checksum(validation): Rework base64 decoding
This commit differentiates Base64 strings that are known to be invalid before decoding (because their length is not a multiple of 4), from Base64 strings that are invalid at decoding (padding is invalid).
1 parent cbd4b6e commit b03ec78

File tree

1 file changed

+38
-28
lines changed

1 file changed

+38
-28
lines changed

src/uucore/src/lib/features/checksum/validate.rs

Lines changed: 38 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ use std::fmt::Display;
1111
use std::fs::File;
1212
use std::io::{self, BufReader, Read, Write, stdin};
1313

14-
use data_encoding::BASE64;
1514
use os_display::Quotable;
1615

1716
use crate::checksum::{AlgoKind, ChecksumError, SizedAlgoKind, digest_reader, unescape_filename};
@@ -467,35 +466,45 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
467466

468467
/// Extract the expected digest from the checksum string
469468
fn get_expected_digest_as_hex_string(
470-
line_info: &LineInfo,
471-
len_hint: Option<usize>,
469+
checksum: &String,
470+
bytelen_hint: Option<usize>,
472471
) -> Option<Cow<'_, str>> {
473-
let ck = &line_info.checksum;
474-
475-
let against_hint = |len| len_hint.is_none_or(|l| l == len);
476-
477-
if ck.len() % 2 != 0 {
472+
if checksum.len() % 2 != 0 {
478473
// If the length of the digest is not a multiple of 2, then it
479474
// must be improperly formatted (1 hex digit is 2 characters)
480475
return None;
481476
}
482477

483-
// If the digest can be decoded as hexadecimal AND its length matches the
484-
// one expected (in case it's given), just go with it.
485-
if ck.as_bytes().iter().all(u8::is_ascii_hexdigit) && against_hint(ck.len()) {
486-
return Some(Cow::Borrowed(ck));
478+
let checks_hint = |len| bytelen_hint.is_none_or(|hint| hint == len);
479+
480+
// If the digest can be decoded as hexadecimal AND its byte length matches
481+
// the one expected (in case it's given), just go with it.
482+
if checksum.as_bytes().iter().all(u8::is_ascii_hexdigit) && checks_hint(checksum.len() / 2) {
483+
return Some(checksum.as_str().into());
487484
}
488485

489-
// If hexadecimal digest fails for any reason, interpret the digest as base 64.
490-
BASE64
491-
.decode(ck.as_bytes()) // Decode the string as encoded base64
492-
.map(hex::encode) // Encode it back as hexadecimal
493-
.map(Cow::<str>::Owned)
494-
.ok()
495-
.and_then(|s| {
496-
// Check the digest length
497-
if against_hint(s.len()) { Some(s) } else { None }
498-
})
486+
// If hexadecimal digest fails for any reason, interpret the digest as base
487+
// 64.
488+
489+
// But first, verify the encoded checksum length, which should be a
490+
// multiple of 4.
491+
if checksum.len() % 4 != 0 {
492+
return None;
493+
}
494+
495+
// Perform the decoding and be FORGIVING about it, to allow for checksums
496+
// with invalid padding to still be decoded. This is enforced by
497+
// `test_untagged_base64_matching_tag` in `test_cksum.rs`
498+
//
499+
// TODO: Ideally, we should not re-encode the result in hexadecimal, to avoid
500+
// un-necessary computation.
501+
502+
match base64_simd::forgiving_decode_to_vec(checksum.as_bytes()) {
503+
Ok(buffer) if checks_hint(buffer.len()) => Some(hex::encode(buffer).into()),
504+
// The resulting length is not as expected
505+
Ok(_) => None,
506+
Err(_) => None,
507+
}
499508
}
500509

501510
/// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
@@ -686,12 +695,13 @@ fn process_algo_based_line(
686695
// If the digest bitlen is known, we can check the format of the expected
687696
// checksum with it.
688697
let digest_char_length_hint = match (algo_kind, algo_byte_len) {
689-
(AlgoKind::Blake2b, Some(bytelen)) => Some(bytelen * 2),
698+
(AlgoKind::Blake2b, Some(bytelen)) => Some(bytelen),
690699
_ => None,
691700
};
692701

693-
let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint)
694-
.ok_or(LineCheckError::ImproperlyFormatted)?;
702+
let expected_checksum =
703+
get_expected_digest_as_hex_string(&line_info.checksum, digest_char_length_hint)
704+
.ok_or(LineCheckError::ImproperlyFormatted)?;
695705

696706
let algo = SizedAlgoKind::from_unsized(algo_kind, algo_byte_len)?;
697707

@@ -714,7 +724,7 @@ fn process_non_algo_based_line(
714724
// Remove the leading asterisk if present - only for the first line
715725
filename_to_check = &filename_to_check[1..];
716726
}
717-
let expected_checksum = get_expected_digest_as_hex_string(line_info, None)
727+
let expected_checksum = get_expected_digest_as_hex_string(&line_info.checksum, None)
718728
.ok_or(LineCheckError::ImproperlyFormatted)?;
719729

720730
// When a specific algorithm name is input, use it and use the provided
@@ -1168,7 +1178,7 @@ mod tests {
11681178
let mut cached_line_format = None;
11691179
let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();
11701180

1171-
let result = get_expected_digest_as_hex_string(&line_info, None);
1181+
let result = get_expected_digest_as_hex_string(&line_info.checksum, None);
11721182

11731183
assert_eq!(
11741184
result.unwrap(),
@@ -1183,7 +1193,7 @@ mod tests {
11831193
let mut cached_line_format = None;
11841194
let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();
11851195

1186-
let result = get_expected_digest_as_hex_string(&line_info, None);
1196+
let result = get_expected_digest_as_hex_string(&line_info.checksum, None);
11871197

11881198
assert!(result.is_none());
11891199
}

0 commit comments

Comments
 (0)