Skip to content

Commit f4e5dc2

Browse files
committed
checksum: use the blake2b length as an hint to check the correctness of the expected digest
1 parent 10a9b0b commit f4e5dc2

File tree

1 file changed

+64
-31
lines changed

1 file changed

+64
-31
lines changed

src/uucore/src/lib/features/checksum.rs

Lines changed: 64 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
5-
// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename
5+
// spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit
66

77
use data_encoding::BASE64;
88
use lazy_static::lazy_static;
@@ -515,22 +515,43 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
515515
}
516516

517517
/// Extract the expected digest from the checksum string
518-
fn get_expected_digest_as_hex_string(line_info: &LineInfo) -> Option<Cow<str>> {
518+
fn get_expected_digest_as_hex_string(
519+
line_info: &LineInfo,
520+
len_hint: Option<usize>,
521+
) -> Option<Cow<str>> {
519522
let ck = &line_info.checksum;
520523

521-
if line_info.regex_str() == ALGO_BASED_REGEX_BASE64 {
522-
BASE64
523-
.decode(ck.as_bytes())
524-
.map(hex::encode)
525-
.map(Cow::Owned)
526-
.ok()
527-
} else if ck.len() % 2 == 0 {
528-
Some(Cow::Borrowed(ck))
529-
} else {
524+
// TODO MSRV 1.82, replace `is_some_and` with `is_none_or`
525+
// to improve readability. This closure returns True if a length hint provided
526+
// and the argument isn't the same as the hint.
527+
let against_hint = |len| len_hint.is_some_and(|l| l != len);
528+
529+
if ck.len() % 2 != 0 {
530530
// If the length of the digest is not a multiple of 2, then it
531531
// must be improperly formatted (1 hex digit is 2 characters)
532-
None
532+
return None;
533533
}
534+
535+
// If the digest can be decoded as hexadecimal AND it length match the
536+
// one expected (in case it's given), just go with it.
537+
if ck.as_bytes().iter().all(u8::is_ascii_hexdigit) && !against_hint(ck.len()) {
538+
return Some(Cow::Borrowed(ck));
539+
}
540+
541+
// If hexadecimal digest fails for any reason, interpret the digest as base 64.
542+
BASE64
543+
.decode(ck.as_bytes()) // Decode the string as encoded base64
544+
.map(hex::encode) // Encode it back as hexadecimal
545+
.map(Cow::<str>::Owned)
546+
.ok()
547+
.and_then(|s| {
548+
// Check the digest length
549+
if !against_hint(s.len()) {
550+
Some(s)
551+
} else {
552+
None
553+
}
554+
})
534555
}
535556

536557
/// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
@@ -604,12 +625,11 @@ fn get_input_file(filename: &OsStr) -> UResult<Box<dyn Read>> {
604625
}
605626
}
606627

607-
/// Extracts the algorithm name and length from the regex captures if the algo-based format is matched.
628+
/// Gets the algorithm name and length from the `LineInfo` if the algo-based format is matched.
608629
fn identify_algo_name_and_length(
609630
line_info: &LineInfo,
610631
algo_name_input: Option<&str>,
611632
) -> Option<(String, Option<usize>)> {
612-
// When the algo-based format is matched, extract details from regex captures
613633
let algorithm = line_info
614634
.algo_name
615635
.clone()
@@ -628,15 +648,20 @@ fn identify_algo_name_and_length(
628648
return None;
629649
}
630650

631-
let bits = line_info.algo_bitlen.map_or(Some(None), |bits| {
632-
if bits % 8 == 0 {
633-
Some(Some(bits / 8))
634-
} else {
635-
None // Return None to signal a divisibility issue
651+
let bytes = if let Some(bitlen) = line_info.algo_bit_len {
652+
if bitlen % 8 != 0 {
653+
// The given length is wrong
654+
return None;
636655
}
637-
})?;
656+
Some(bitlen / 8)
657+
} else if algorithm == ALGORITHM_OPTIONS_BLAKE2B {
658+
// Default length with BLAKE2b,
659+
Some(64)
660+
} else {
661+
None
662+
};
638663

639-
Some((algorithm, bits))
664+
Some((algorithm, bytes))
640665
}
641666

642667
/// Given a filename and an algorithm, compute the digest and compare it with
@@ -684,13 +709,21 @@ fn process_algo_based_line(
684709
opts: ChecksumOptions,
685710
) -> Result<(), LineCheckError> {
686711
let filename_to_check = line_info.filename.as_slice();
687-
let expected_checksum =
688-
get_expected_digest_as_hex_string(line_info).ok_or(LineCheckError::ImproperlyFormatted)?;
689712

690-
let (algo_name, algo_bitlen) = identify_algo_name_and_length(line_info, cli_algo_name)
713+
let (algo_name, algo_byte_len) = identify_algo_name_and_length(line_info, cli_algo_name)
691714
.ok_or(LineCheckError::ImproperlyFormatted)?;
692715

693-
let algo = detect_algo(&algo_name, algo_bitlen)?;
716+
// If the digest bitlen is known, we can check the format of the expected
717+
// checksum with it.
718+
let digest_char_length_hint = match (algo_name.as_str(), algo_byte_len) {
719+
(ALGORITHM_OPTIONS_BLAKE2B, Some(bytelen)) => Some(bytelen * 2),
720+
_ => None,
721+
};
722+
723+
let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint)
724+
.ok_or(LineCheckError::ImproperlyFormatted)?;
725+
726+
let algo = detect_algo(&algo_name, algo_byte_len)?;
694727

695728
compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts)
696729
}
@@ -709,12 +742,12 @@ fn process_non_algo_based_line(
709742
// Remove the leading asterisk if present - only for the first line
710743
filename_to_check = &filename_to_check[1..];
711744
}
712-
let expected_checksum =
713-
get_expected_digest_as_hex_string(line_info).ok_or(LineCheckError::ImproperlyFormatted)?;
745+
let expected_checksum = get_expected_digest_as_hex_string(line_info, None)
746+
.ok_or(LineCheckError::ImproperlyFormatted)?;
714747

715748
// When a specific algorithm name is input, use it and use the provided bits
716749
// except when dealing with blake2b, where we will detect the length
717-
let (algo_name, algo_bitlen) = if cli_algo_name == ALGORITHM_OPTIONS_BLAKE2B {
750+
let (algo_name, algo_byte_len) = if cli_algo_name == ALGORITHM_OPTIONS_BLAKE2B {
718751
// division by 2 converts the length of the Blake2b checksum from hexadecimal
719752
// characters to bytes, as each byte is represented by two hexadecimal characters.
720753
let length = Some(expected_checksum.len() / 2);
@@ -723,7 +756,7 @@ fn process_non_algo_based_line(
723756
(cli_algo_name.to_lowercase(), cli_algo_length)
724757
};
725758

726-
let algo = detect_algo(&algo_name, algo_bitlen)?;
759+
let algo = detect_algo(&algo_name, algo_byte_len)?;
727760

728761
compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts)
729762
}
@@ -1312,7 +1345,7 @@ mod tests {
13121345
let mut cached_regex = None;
13131346
let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap();
13141347

1315-
let result = get_expected_digest_as_hex_string(&line_info);
1348+
let result = get_expected_digest_as_hex_string(&line_info, None);
13161349

13171350
assert_eq!(
13181351
result.unwrap(),
@@ -1327,7 +1360,7 @@ mod tests {
13271360
let mut cached_regex = None;
13281361
let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap();
13291362

1330-
let result = get_expected_digest_as_hex_string(&line_info);
1363+
let result = get_expected_digest_as_hex_string(&line_info, None);
13311364

13321365
assert!(result.is_none());
13331366
}

0 commit comments

Comments
 (0)