Skip to content

Commit cae71a8

Browse files
authored
Merge pull request #6868 from RenjiSann/checksum-fix-decoding
checksum: Further rework
2 parents c315cd9 + cfc66f9 commit cae71a8

File tree

2 files changed

+139
-152
lines changed

2 files changed

+139
-152
lines changed

src/uucore/src/lib/features/checksum.rs

Lines changed: 84 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,27 @@ pub struct HashAlgorithm {
6868
pub bits: usize,
6969
}
7070

71+
/// This structure holds the count of checksum test lines' outcomes.
7172
#[derive(Default)]
7273
struct ChecksumResult {
73-
pub bad_format: i32,
74-
pub failed_cksum: i32,
75-
pub failed_open_file: i32,
74+
/// Number of lines in the file where the computed checksum MATCHES
75+
/// the expectation.
76+
pub correct: u32,
77+
/// Number of lines in the file where the computed checksum DIFFERS
78+
/// from the expectation.
79+
pub failed_cksum: u32,
80+
pub failed_open_file: u32,
81+
/// Number of improperly formatted lines.
82+
pub bad_format: u32,
83+
/// Total number of non-empty, non-comment lines.
84+
pub total: u32,
85+
}
86+
87+
impl ChecksumResult {
88+
#[inline]
89+
fn total_properly_formatted(&self) -> u32 {
90+
self.total - self.bad_format
91+
}
7692
}
7793

7894
/// Represents a reason for which the processing of a checksum line
@@ -107,14 +123,16 @@ impl From<ChecksumError> for LineCheckError {
107123
}
108124

109125
/// Represents an error that was encountered when processing a checksum file.
110-
#[allow(clippy::enum_variant_names)]
111126
enum FileCheckError {
112127
/// a generic UError was encountered in sub-functions
113128
UError(Box<dyn UError>),
114-
/// the error does not stop the processing of next files
115-
NonCriticalError,
116-
/// the error must stop the run of the program
117-
CriticalError,
129+
/// the checksum file is improperly formatted.
130+
ImproperlyFormatted,
131+
/// reading of the checksum file failed
132+
CantOpenChecksumFile,
133+
/// Algorithm detection was unsuccessful.
134+
/// Either none is provided, or there is a conflict.
135+
AlgoDetectionError,
118136
}
119137

120138
impl From<Box<dyn UError>> for FileCheckError {
@@ -172,8 +190,6 @@ pub enum ChecksumError {
172190
CombineMultipleAlgorithms,
173191
#[error("Needs an algorithm to hash with.\nUse --help for more information.")]
174192
NeedAlgorithmToHash,
175-
#[error("{filename}: no properly formatted checksum lines found")]
176-
NoProperlyFormattedChecksumLinesFound { filename: String },
177193
}
178194

179195
impl UError for ChecksumError {
@@ -239,6 +255,12 @@ fn cksum_output(res: &ChecksumResult, status: bool) {
239255
}
240256
}
241257

258+
/// Print a "no properly formatted lines" message in stderr
259+
#[inline]
260+
fn log_no_properly_formatted(filename: String) {
261+
show_error!("{filename}: no properly formatted checksum lines found");
262+
}
263+
242264
/// Represents the different outcomes that can happen to a file
243265
/// that is being checked.
244266
#[derive(Debug, Clone, Copy)]
@@ -439,43 +461,19 @@ fn determine_regex(lines: &[OsString]) -> Option<(Regex, bool)> {
439461
None
440462
}
441463

442-
// Converts bytes to a hexadecimal string
443-
fn bytes_to_hex(bytes: &[u8]) -> String {
444-
use std::fmt::Write;
445-
bytes
446-
.iter()
447-
.fold(String::with_capacity(bytes.len() * 2), |mut hex, byte| {
448-
write!(hex, "{byte:02x}").unwrap();
449-
hex
450-
})
451-
}
464+
/// Extract the expected digest from the checksum string
465+
fn get_expected_digest_as_hex_string(caps: &Captures, chosen_regex: &Regex) -> Option<String> {
466+
// Unwraps are safe, ensured by regex.
467+
let ck = caps.name("checksum").unwrap().as_bytes();
452468

453-
fn get_expected_checksum(
454-
filename: &[u8],
455-
caps: &Captures,
456-
chosen_regex: &Regex,
457-
) -> UResult<String> {
458469
if chosen_regex.as_str() == ALGO_BASED_REGEX_BASE64 {
459-
// Unwrap is safe, ensured by regex
460-
let ck = caps.name("checksum").unwrap().as_bytes();
461-
match BASE64.decode(ck) {
462-
Ok(decoded_bytes) => {
463-
match std::str::from_utf8(&decoded_bytes) {
464-
Ok(decoded_str) => Ok(decoded_str.to_string()),
465-
Err(_) => Ok(bytes_to_hex(&decoded_bytes)), // Handle as raw bytes if not valid UTF-8
466-
}
467-
}
468-
Err(_) => Err(Box::new(
469-
ChecksumError::NoProperlyFormattedChecksumLinesFound {
470-
filename: String::from_utf8_lossy(filename).to_string(),
471-
},
472-
)),
473-
}
470+
BASE64.decode(ck).map(hex::encode).ok()
471+
} else if ck.len() % 2 == 0 {
472+
Some(str::from_utf8(ck).unwrap().to_string())
474473
} else {
475-
// Unwraps are safe, ensured by regex.
476-
Ok(str::from_utf8(caps.name("checksum").unwrap().as_bytes())
477-
.unwrap()
478-
.to_string())
474+
// If the length of the digest is not a multiple of 2, then it
475+
// must be improperly formatted (1 hex digit is 2 characters)
476+
None
479477
}
480478
}
481479

@@ -554,8 +552,6 @@ fn get_input_file(filename: &OsStr) -> UResult<Box<dyn Read>> {
554552
fn identify_algo_name_and_length(
555553
caps: &Captures,
556554
algo_name_input: Option<&str>,
557-
res: &mut ChecksumResult,
558-
properly_formatted: &mut bool,
559555
) -> Option<(String, Option<usize>)> {
560556
// When the algo-based format is matched, extract details from regex captures
561557
let algorithm = caps
@@ -569,14 +565,11 @@ fn identify_algo_name_and_length(
569565
// (for example SHA1 (f) = d...)
570566
// Also handle the case cksum -s sm3 but the file contains other formats
571567
if algo_name_input.is_some() && algo_name_input != Some(&algorithm) {
572-
res.bad_format += 1;
573-
*properly_formatted = false;
574568
return None;
575569
}
576570

577571
if !SUPPORTED_ALGORITHMS.contains(&algorithm.as_str()) {
578572
// Not supported algo, leave early
579-
*properly_formatted = false;
580573
return None;
581574
}
582575

@@ -588,7 +581,6 @@ fn identify_algo_name_and_length(
588581
if bits_value % 8 == 0 {
589582
Some(Some(bits_value / 8))
590583
} else {
591-
*properly_formatted = false;
592584
None // Return None to signal a divisibility issue
593585
}
594586
})?;
@@ -609,16 +601,12 @@ fn process_checksum_line(
609601
i: usize,
610602
chosen_regex: &Regex,
611603
is_algo_based_format: bool,
612-
res: &mut ChecksumResult,
613604
cli_algo_name: Option<&str>,
614605
cli_algo_length: Option<usize>,
615-
properly_formatted: &mut bool,
616606
opts: ChecksumOptions,
617607
) -> Result<(), LineCheckError> {
618608
let line_bytes = os_str_as_bytes(line)?;
619609
if let Some(caps) = chosen_regex.captures(line_bytes) {
620-
*properly_formatted = true;
621-
622610
let mut filename_to_check = caps.name("filename").unwrap().as_bytes();
623611

624612
if filename_to_check.starts_with(b"*")
@@ -629,12 +617,13 @@ fn process_checksum_line(
629617
filename_to_check = &filename_to_check[1..];
630618
}
631619

632-
let expected_checksum = get_expected_checksum(filename_to_check, &caps, chosen_regex)?;
620+
let expected_checksum = get_expected_digest_as_hex_string(&caps, chosen_regex)
621+
.ok_or(LineCheckError::ImproperlyFormatted)?;
633622

634623
// If the algo_name is provided, we use it, otherwise we try to detect it
635624
let (algo_name, length) = if is_algo_based_format {
636-
identify_algo_name_and_length(&caps, cli_algo_name, res, properly_formatted)
637-
.unwrap_or((String::new(), None))
625+
identify_algo_name_and_length(&caps, cli_algo_name)
626+
.ok_or(LineCheckError::ImproperlyFormatted)?
638627
} else if let Some(a) = cli_algo_name {
639628
// When a specific algorithm name is input, use it and use the provided bits
640629
// except when dealing with blake2b, where we will detect the length
@@ -648,16 +637,9 @@ fn process_checksum_line(
648637
}
649638
} else {
650639
// Default case if no algorithm is specified and non-algo based format is matched
651-
(String::new(), None)
640+
return Err(LineCheckError::ImproperlyFormatted);
652641
};
653642

654-
if algo_name.is_empty() {
655-
// we haven't been able to detect the algo name. No point to continue
656-
*properly_formatted = false;
657-
658-
// TODO: return error?
659-
return Err(LineCheckError::ImproperlyFormatted);
660-
}
661643
let mut algo = detect_algo(&algo_name, length)?;
662644

663645
let (filename_to_check_unescaped, prefix) = unescape_filename(filename_to_check);
@@ -709,7 +691,6 @@ fn process_checksum_line(
709691
);
710692
}
711693

712-
res.bad_format += 1;
713694
Err(LineCheckError::ImproperlyFormatted)
714695
}
715696
}
@@ -720,9 +701,8 @@ fn process_checksum_file(
720701
cli_algo_length: Option<usize>,
721702
opts: ChecksumOptions,
722703
) -> Result<(), FileCheckError> {
723-
let mut correct_format = 0;
724-
let mut properly_formatted = false;
725704
let mut res = ChecksumResult::default();
705+
726706
let input_is_stdin = filename_input == OsStr::new("-");
727707

728708
let file: Box<dyn Read> = if input_is_stdin {
@@ -735,7 +715,7 @@ fn process_checksum_file(
735715
// Could not read the file, show the error and continue to the next file
736716
show_error!("{e}");
737717
set_exit_code(1);
738-
return Err(FileCheckError::NonCriticalError);
718+
return Err(FileCheckError::CantOpenChecksumFile);
739719
}
740720
}
741721
};
@@ -744,60 +724,57 @@ fn process_checksum_file(
744724
let lines = read_os_string_lines(reader).collect::<Vec<_>>();
745725

746726
let Some((chosen_regex, is_algo_based_format)) = determine_regex(&lines) else {
747-
let e = ChecksumError::NoProperlyFormattedChecksumLinesFound {
748-
filename: get_filename_for_output(filename_input, input_is_stdin),
749-
};
750-
show_error!("{e}");
727+
log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin));
751728
set_exit_code(1);
752-
return Err(FileCheckError::NonCriticalError);
729+
return Err(FileCheckError::AlgoDetectionError);
753730
};
754731

755732
for (i, line) in lines.iter().enumerate() {
756-
match process_checksum_line(
733+
let line_result = process_checksum_line(
757734
filename_input,
758735
line,
759736
i,
760737
&chosen_regex,
761738
is_algo_based_format,
762-
&mut res,
763739
cli_algo_name,
764740
cli_algo_length,
765-
&mut properly_formatted,
766741
opts,
767-
) {
768-
Ok(()) => correct_format += 1,
769-
Err(LineCheckError::DigestMismatch) => res.failed_cksum += 1,
770-
Err(LineCheckError::UError(e)) => return Err(e.into()),
771-
Err(LineCheckError::Skipped) => continue,
772-
Err(LineCheckError::ImproperlyFormatted) => (),
773-
Err(LineCheckError::CantOpenFile | LineCheckError::FileIsDirectory) => {
774-
res.failed_open_file += 1
775-
}
776-
Err(LineCheckError::FileNotFound) => {
777-
if !opts.ignore_missing {
778-
res.failed_open_file += 1
779-
}
780-
}
742+
);
743+
744+
// Match a first time to elude critical UErrors, and increment the total
745+
// in all cases except on skipped.
746+
use LineCheckError::*;
747+
match line_result {
748+
Err(UError(e)) => return Err(e.into()),
749+
Err(Skipped) => (),
750+
_ => res.total += 1,
751+
}
752+
753+
// Match a second time to update the right field of `res`.
754+
match line_result {
755+
Ok(()) => res.correct += 1,
756+
Err(DigestMismatch) => res.failed_cksum += 1,
757+
Err(ImproperlyFormatted) => res.bad_format += 1,
758+
Err(CantOpenFile | FileIsDirectory) => res.failed_open_file += 1,
759+
Err(FileNotFound) if !opts.ignore_missing => res.failed_open_file += 1,
760+
_ => continue,
781761
};
782762
}
783763

784764
// not a single line correctly formatted found
785765
// return an error
786-
if !properly_formatted {
766+
if res.total_properly_formatted() == 0 {
787767
if !opts.status {
788-
return Err(ChecksumError::NoProperlyFormattedChecksumLinesFound {
789-
filename: get_filename_for_output(filename_input, input_is_stdin),
790-
}
791-
.into());
768+
log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin));
792769
}
793770
set_exit_code(1);
794-
return Err(FileCheckError::CriticalError);
771+
return Err(FileCheckError::ImproperlyFormatted);
795772
}
796773

797774
// if any incorrectly formatted line, show it
798775
cksum_output(&res, opts.status);
799776

800-
if opts.ignore_missing && correct_format == 0 {
777+
if opts.ignore_missing && res.correct == 0 {
801778
// we have only bad format
802779
// and we had ignore-missing
803780
eprintln!(
@@ -839,8 +816,8 @@ where
839816
use FileCheckError::*;
840817
match process_checksum_file(filename_input, algo_name_input, length_input, opts) {
841818
Err(UError(e)) => return Err(e),
842-
Err(CriticalError) => break,
843-
Err(NonCriticalError) | Ok(_) => continue,
819+
Err(ImproperlyFormatted) => break,
820+
Err(CantOpenChecksumFile | AlgoDetectionError) | Ok(_) => continue,
844821
}
845822
}
846823

@@ -1079,7 +1056,7 @@ mod tests {
10791056
];
10801057

10811058
for (input, expected) in test_cases {
1082-
let captures = algo_based_regex.captures(*input);
1059+
let captures = algo_based_regex.captures(input);
10831060
match expected {
10841061
Some((algo, bits, filename, checksum)) => {
10851062
assert!(captures.is_some());
@@ -1229,7 +1206,7 @@ mod tests {
12291206

12301207
// Test leading space before checksum line
12311208
let lines_algo_based_leading_space =
1232-
vec![" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
1209+
[" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"]
12331210
.iter()
12341211
.map(|s| OsString::from(s.to_string()))
12351212
.collect::<Vec<_>>();
@@ -1239,7 +1216,7 @@ mod tests {
12391216

12401217
// Test trailing space after checksum line (should fail)
12411218
let lines_algo_based_leading_space =
1242-
vec!["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "]
1219+
["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "]
12431220
.iter()
12441221
.map(|s| OsString::from(s.to_string()))
12451222
.collect::<Vec<_>>();
@@ -1248,13 +1225,13 @@ mod tests {
12481225
}
12491226

12501227
#[test]
1251-
fn test_get_expected_checksum() {
1228+
fn test_get_expected_digest() {
12521229
let re = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
12531230
let caps = re
12541231
.captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")
12551232
.unwrap();
12561233

1257-
let result = get_expected_checksum(b"filename", &caps, &re);
1234+
let result = get_expected_digest_as_hex_string(&caps, &re);
12581235

12591236
assert_eq!(
12601237
result.unwrap(),
@@ -1269,9 +1246,9 @@ mod tests {
12691246
.captures(b"SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU")
12701247
.unwrap();
12711248

1272-
let result = get_expected_checksum(b"filename", &caps, &re);
1249+
let result = get_expected_digest_as_hex_string(&caps, &re);
12731250

1274-
assert!(result.is_err());
1251+
assert!(result.is_none());
12751252
}
12761253

12771254
#[test]

0 commit comments

Comments
 (0)