Skip to content

Commit 5461873

Browse files
committed
checksum(validate): Check calculated checksum against raw expected
to avoid decoding base64 and directly re-encoding it in hexadecimal
1 parent 1a43cbd commit 5461873

File tree

1 file changed

+28
-32
lines changed

1 file changed

+28
-32
lines changed

src/uucore/src/lib/features/checksum/validate.rs

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
// spell-checker:ignore rsplit hexdigit bitlen bytelen invalidchecksum inva idchecksum xffname
77

8-
use std::borrow::Cow;
98
use std::ffi::OsStr;
109
use std::fmt::Display;
1110
use std::fs::File;
@@ -16,6 +15,7 @@ use os_display::Quotable;
1615
use crate::checksum::{AlgoKind, ChecksumError, SizedAlgoKind, digest_reader, unescape_filename};
1716
use crate::error::{FromIo, UError, UResult, USimpleError};
1817
use crate::quoting_style::{QuotingStyle, locale_aware_escape_name};
18+
use crate::sum::DigestOutput;
1919
use crate::{
2020
os_str_as_bytes, os_str_from_bytes, read_os_string_lines, show, show_error, show_warning_caps,
2121
util_name,
@@ -483,14 +483,12 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
483483
.to_string()
484484
}
485485

486-
/// Extract the expected digest from the checksum string
487-
fn get_expected_digest_as_hex_string(
488-
checksum: &String,
489-
bytelen_hint: Option<usize>,
490-
) -> Option<Cow<'_, str>> {
486+
/// Extract the expected digest from the checksum string and decode it
487+
fn get_raw_expected_digest(checksum: &str, bytelen_hint: Option<usize>) -> Option<Vec<u8>> {
488+
// If the length of the digest is not a multiple of 2, then it must be
489+
// improperly formatted (1 hex digit is 2 characters, and base64 strings
490+
// should always be a multiple of 4).
491491
if checksum.len() % 2 != 0 {
492-
// If the length of the digest is not a multiple of 2, then it
493-
// must be improperly formatted (1 hex digit is 2 characters)
494492
return None;
495493
}
496494

@@ -499,11 +497,11 @@ fn get_expected_digest_as_hex_string(
499497
// If the digest can be decoded as hexadecimal AND its byte length matches
500498
// the one expected (in case it's given), just go with it.
501499
if checksum.as_bytes().iter().all(u8::is_ascii_hexdigit) && checks_hint(checksum.len() / 2) {
502-
return Some(checksum.as_str().into());
500+
return hex::decode(checksum).ok();
503501
}
504502

505-
// If hexadecimal digest fails for any reason, interpret the digest as base
506-
// 64.
503+
// If the checksum cannot be decoded as hexadecimal, interpret it as Base64
504+
// instead.
507505

508506
// But first, verify the encoded checksum length, which should be a
509507
// multiple of 4.
@@ -514,16 +512,10 @@ fn get_expected_digest_as_hex_string(
514512
// Perform the decoding and be FORGIVING about it, to allow for checksums
515513
// with invalid padding to still be decoded. This is enforced by
516514
// `test_untagged_base64_matching_tag` in `test_cksum.rs`
517-
//
518-
// TODO: Ideally, we should not re-encode the result in hexadecimal, to avoid
519-
// un-necessary computation.
520-
521-
match base64_simd::forgiving_decode_to_vec(checksum.as_bytes()) {
522-
Ok(buffer) if checks_hint(buffer.len()) => Some(hex::encode(buffer).into()),
523-
// The resulting length is not as expected
524-
Ok(_) => None,
525-
Err(_) => None,
526-
}
515+
516+
base64_simd::forgiving_decode_to_vec(checksum.as_bytes())
517+
.ok()
518+
.filter(|raw| checks_hint(raw.len()))
527519
}
528520

529521
/// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
@@ -663,7 +655,7 @@ fn identify_algo_name_and_length(
663655
/// the expected one.
664656
fn compute_and_check_digest_from_file(
665657
filename: &[u8],
666-
expected_checksum: &str,
658+
expected_checksum: &[u8],
667659
algo: SizedAlgoKind,
668660
opts: ChecksumValidateOptions,
669661
) -> Result<(), LineCheckError> {
@@ -683,7 +675,11 @@ fn compute_and_check_digest_from_file(
683675
digest_reader(&mut digest, &mut file_reader, /* binary */ false).unwrap();
684676

685677
// Do the checksum validation
686-
let checksum_correct = expected_checksum == calculated_checksum.to_hex()?;
678+
let checksum_correct = match calculated_checksum {
679+
DigestOutput::Vec(data) => data == expected_checksum,
680+
DigestOutput::Crc(n) => n.to_be_bytes() == expected_checksum,
681+
DigestOutput::U16(n) => n.to_be_bytes() == expected_checksum,
682+
};
687683
print_file_report(
688684
std::io::stdout(),
689685
filename,
@@ -718,9 +714,8 @@ fn process_algo_based_line(
718714
_ => None,
719715
};
720716

721-
let expected_checksum =
722-
get_expected_digest_as_hex_string(&line_info.checksum, digest_char_length_hint)
723-
.ok_or(LineCheckError::ImproperlyFormatted)?;
717+
let expected_checksum = get_raw_expected_digest(&line_info.checksum, digest_char_length_hint)
718+
.ok_or(LineCheckError::ImproperlyFormatted)?;
724719

725720
let algo = SizedAlgoKind::from_unsized(algo_kind, algo_byte_len)?;
726721

@@ -743,7 +738,7 @@ fn process_non_algo_based_line(
743738
// Remove the leading asterisk if present - only for the first line
744739
filename_to_check = &filename_to_check[1..];
745740
}
746-
let expected_checksum = get_expected_digest_as_hex_string(&line_info.checksum, None)
741+
let expected_checksum = get_raw_expected_digest(&line_info.checksum, None)
747742
.ok_or(LineCheckError::ImproperlyFormatted)?;
748743

749744
// When a specific algorithm name is input, use it and use the provided
@@ -754,11 +749,11 @@ fn process_non_algo_based_line(
754749
// division by 2 converts the length of the Blake2b checksum from
755750
// hexadecimal characters to bytes, as each byte is represented by
756751
// two hexadecimal characters.
757-
(AlgoKind::Blake2b, Some(expected_checksum.len() / 2))
752+
(AlgoKind::Blake2b, Some(expected_checksum.len()))
758753
}
759754
algo @ (AlgoKind::Sha2 | AlgoKind::Sha3) => {
760755
// multiplication by 4 to get the number of bits
761-
(algo, Some(expected_checksum.len() * 4))
756+
(algo, Some(expected_checksum.len() * 8))
762757
}
763758
_ => (cli_algo_kind, cli_algo_length),
764759
};
@@ -1197,11 +1192,12 @@ mod tests {
11971192
let mut cached_line_format = None;
11981193
let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();
11991194

1200-
let result = get_expected_digest_as_hex_string(&line_info.checksum, None);
1195+
let result = get_raw_expected_digest(&line_info.checksum, None);
12011196

12021197
assert_eq!(
12031198
result.unwrap(),
1204-
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
1199+
hex::decode(b"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
1200+
.unwrap()
12051201
);
12061202
}
12071203

@@ -1212,7 +1208,7 @@ mod tests {
12121208
let mut cached_line_format = None;
12131209
let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();
12141210

1215-
let result = get_expected_digest_as_hex_string(&line_info.checksum, None);
1211+
let result = get_raw_expected_digest(&line_info.checksum, None);
12161212

12171213
assert!(result.is_none());
12181214
}

0 commit comments

Comments
 (0)