Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 39 additions & 29 deletions src/uucore/src/lib/features/checksum/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

// spell-checker:ignore rsplit hexdigit bitlen bytelen invalidchecksum inva idchecksum xffname
// spell-checker:ignore rsplit hexdigit bitlen invalidchecksum inva idchecksum xffname

use std::borrow::Cow;
use std::ffi::OsStr;
use std::fmt::Display;
use std::fs::File;
use std::io::{self, BufReader, Read, Write, stdin};

use data_encoding::BASE64;
use os_display::Quotable;

use crate::checksum::{AlgoKind, ChecksumError, SizedAlgoKind, digest_reader, unescape_filename};
Expand Down Expand Up @@ -467,35 +466,45 @@ fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {

/// Extract the expected digest from the checksum string
fn get_expected_digest_as_hex_string(
line_info: &LineInfo,
len_hint: Option<usize>,
checksum: &String,
byte_len_hint: Option<usize>,
) -> Option<Cow<'_, str>> {
let ck = &line_info.checksum;

let against_hint = |len| len_hint.is_none_or(|l| l == len);

if ck.len() % 2 != 0 {
if checksum.len() % 2 != 0 {
// If the length of the digest is not a multiple of 2, then it
// must be improperly formatted (1 hex digit is 2 characters)
return None;
}

// If the digest can be decoded as hexadecimal AND its length matches the
// one expected (in case it's given), just go with it.
if ck.as_bytes().iter().all(u8::is_ascii_hexdigit) && against_hint(ck.len()) {
return Some(Cow::Borrowed(ck));
let checks_hint = |len| byte_len_hint.is_none_or(|hint| hint == len);

// If the digest can be decoded as hexadecimal AND its byte length matches
// the one expected (in case it's given), just go with it.
if checksum.as_bytes().iter().all(u8::is_ascii_hexdigit) && checks_hint(checksum.len() / 2) {
return Some(checksum.as_str().into());
}

// If hexadecimal digest fails for any reason, interpret the digest as base 64.
BASE64
.decode(ck.as_bytes()) // Decode the string as encoded base64
.map(hex::encode) // Encode it back as hexadecimal
.map(Cow::<str>::Owned)
.ok()
.and_then(|s| {
// Check the digest length
if against_hint(s.len()) { Some(s) } else { None }
})
// If hexadecimal digest fails for any reason, interpret the digest as base
// 64.

// But first, verify the encoded checksum length, which should be a
// multiple of 4.
if checksum.len() % 4 != 0 {
return None;
}

// Perform the decoding and be FORGIVING about it, to allow for checksums
// with invalid padding to still be decoded. This is enforced by
// `test_untagged_base64_matching_tag` in `test_cksum.rs`
//
// TODO: Ideally, we should not re-encode the result in hexadecimal, to avoid
// un-necessary computation.

match base64_simd::forgiving_decode_to_vec(checksum.as_bytes()) {
Ok(buffer) if checks_hint(buffer.len()) => Some(hex::encode(buffer).into()),
// The resulting length is not as expected
Ok(_) => None,
Err(_) => None,
}
Comment on lines +502 to +507
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A functional approach might be cleaner, though I'm aware it's subjective. Feel free to ignore this comment ;-)

    base64_simd::forgiving_decode_to_vec(checksum.as_bytes())
        .ok()
        .and_then(|buffer| {
            if checks_hint(buffer.len()) {
                Some(hex::encode(buffer).into())
            } else {
                None
            }
        })

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree with you. However, I'm changing this in the following MR #9532, so I will keep it like so on this MR and avoid a funny rebase :)

}

/// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-".
Expand Down Expand Up @@ -691,12 +700,13 @@ fn process_algo_based_line(
// If the digest bitlen is known, we can check the format of the expected
// checksum with it.
let digest_char_length_hint = match (algo_kind, algo_byte_len) {
(AlgoKind::Blake2b, Some(bytelen)) => Some(bytelen * 2),
(AlgoKind::Blake2b, Some(byte_len)) => Some(byte_len),
_ => None,
};

let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint)
.ok_or(LineCheckError::ImproperlyFormatted)?;
let expected_checksum =
get_expected_digest_as_hex_string(&line_info.checksum, digest_char_length_hint)
.ok_or(LineCheckError::ImproperlyFormatted)?;

let algo = SizedAlgoKind::from_unsized(algo_kind, algo_byte_len)?;

Expand All @@ -719,7 +729,7 @@ fn process_non_algo_based_line(
// Remove the leading asterisk if present - only for the first line
filename_to_check = &filename_to_check[1..];
}
let expected_checksum = get_expected_digest_as_hex_string(line_info, None)
let expected_checksum = get_expected_digest_as_hex_string(&line_info.checksum, None)
.ok_or(LineCheckError::ImproperlyFormatted)?;

// When a specific algorithm name is input, use it and use the provided
Expand Down Expand Up @@ -1173,7 +1183,7 @@ mod tests {
let mut cached_line_format = None;
let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();

let result = get_expected_digest_as_hex_string(&line_info, None);
let result = get_expected_digest_as_hex_string(&line_info.checksum, None);

assert_eq!(
result.unwrap(),
Expand All @@ -1188,7 +1198,7 @@ mod tests {
let mut cached_line_format = None;
let line_info = LineInfo::parse(&line, &mut cached_line_format).unwrap();

let result = get_expected_digest_as_hex_string(&line_info, None);
let result = get_expected_digest_as_hex_string(&line_info.checksum, None);

assert!(result.is_none());
}
Expand Down
83 changes: 65 additions & 18 deletions tests/by-util/test_cksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2458,6 +2458,71 @@ mod gnu_cksum_c {
scene
}

fn make_scene_with_comment() -> TestScenario {
let scene = make_scene();

scene
.fixtures
.append("CHECKSUMS", "# Very important comment\n");

scene
}

fn make_scene_with_invalid_line() -> TestScenario {
let scene = make_scene_with_comment();

scene.fixtures.append("CHECKSUMS", "invalid_line\n");

scene
}

#[test]
fn test_tagged_invalid_length() {
let (at, mut ucmd) = at_and_ucmd!();

at.write(
"sha2-bad-length.sum",
"SHA2-128 (/dev/null) = 38b060a751ac96384cd9327eb1b1e36a",
);

ucmd.arg("--check")
.arg("sha2-bad-length.sum")
.fails()
.stderr_contains("sha2-bad-length.sum: no properly formatted checksum lines found");
}

#[test]
#[cfg_attr(not(unix), ignore = "/dev/null is only available on UNIX")]
fn test_untagged_base64_matching_tag() {
let (at, mut ucmd) = at_and_ucmd!();

at.write("tag-prefix.sum", "SHA1+++++++++++++++++++++++= /dev/null");

ucmd.arg("--check")
.arg("-a")
.arg("sha1")
.arg("tag-prefix.sum")
.fails()
.stderr_contains("WARNING: 1 computed checksum did NOT match");
}

#[test]
#[cfg_attr(windows, ignore = "Awkward filename is not supported on windows")]
fn test_awkward_filename() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;

let awkward_file = "abc (f) = abc";

at.touch(awkward_file);

let result = ts.ucmd().arg("-a").arg("sha1").arg(awkward_file).succeeds();

at.write_bytes("tag-awkward.sum", result.stdout());

ts.ucmd().arg("-c").arg("tag-awkward.sum").succeeds();
}

#[test]
#[ignore = "todo"]
fn test_signed_checksums() {
Expand Down Expand Up @@ -2509,16 +2574,6 @@ mod gnu_cksum_c {
.no_output();
}

fn make_scene_with_comment() -> TestScenario {
let scene = make_scene();

scene
.fixtures
.append("CHECKSUMS", "# Very important comment\n");

scene
}

#[test]
fn test_status_with_comment() {
let scene = make_scene_with_comment();
Expand All @@ -2532,14 +2587,6 @@ mod gnu_cksum_c {
.no_output();
}

fn make_scene_with_invalid_line() -> TestScenario {
let scene = make_scene_with_comment();

scene.fixtures.append("CHECKSUMS", "invalid_line\n");

scene
}

#[test]
fn test_check_strict() {
let scene = make_scene_with_invalid_line();
Expand Down
Loading