Skip to content

Commit 567bbc5

Browse files
committed
checksum: remove ALGO_BASED_REGEX (non base64) as its not useful anymore and introduce LineFormat struct
1 parent f4e5dc2 commit 567bbc5

File tree

1 file changed

+48
-37
lines changed

1 file changed

+48
-37
lines changed

src/uucore/src/lib/features/checksum.rs

Lines changed: 48 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,7 @@ pub fn detect_algo(algo: &str, length: Option<usize>) -> UResult<HashAlgorithm>
421421
// algo must be uppercase or b (for blake2b)
422422
// 2. <checksum> [* ]<filename>
423423
// 3. <checksum> [*]<filename> (only one space)
424-
const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[a-fA-F0-9]+)$";
425-
const ALGO_BASED_REGEX_BASE64: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
424+
const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$";
426425

427426
const DOUBLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>(?-u:.*))$";
428427

@@ -433,7 +432,23 @@ lazy_static! {
433432
static ref R_ALGO_BASED: Regex = Regex::new(ALGO_BASED_REGEX).unwrap();
434433
static ref R_DOUBLE_SPACE: Regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
435434
static ref R_SINGLE_SPACE: Regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();
436-
static ref R_ALGO_BASED_BASE_64: Regex = Regex::new(ALGO_BASED_REGEX_BASE64).unwrap();
435+
}
436+
437+
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
438+
enum LineFormat {
439+
AlgoBased,
440+
SingleSpace,
441+
DoubleSpace,
442+
}
443+
444+
impl LineFormat {
445+
fn to_regex(self) -> &'static Regex {
446+
match self {
447+
LineFormat::AlgoBased => &R_ALGO_BASED,
448+
LineFormat::SingleSpace => &R_SINGLE_SPACE,
449+
LineFormat::DoubleSpace => &R_DOUBLE_SPACE,
450+
}
451+
}
437452
}
438453

439454
/// Hold the data extracted from a checksum line.
@@ -443,34 +458,41 @@ struct LineInfo {
443458
checksum: String,
444459
filename: Vec<u8>,
445460

446-
regex: &'static Regex,
461+
format: LineFormat,
447462
}
448463

449464
impl LineInfo {
450-
fn parse(s: impl AsRef<OsStr>, cached_regex: &mut Option<&'static Regex>) -> Option<Self> {
451-
let regexes: &[(&'static Regex, bool)] = &[
452-
(&R_ALGO_BASED, true),
453-
(&R_DOUBLE_SPACE, false),
454-
(&R_SINGLE_SPACE, false),
455-
(&R_ALGO_BASED_BASE_64, true),
465+
/// Returns a `LineInfo` parsed from a checksum line.
466+
/// The function will run 3 regexes against the line and select the first one that matches
467+
/// to populate the fields of the struct.
468+
/// However, there is a catch to handle regarding the handling of `cached_regex`.
469+
/// In case of non-algo-based regex, if `cached_regex` is Some, it must take the priority
470+
/// over the detected regex. Otherwise, we must set it the the detected regex.
471+
/// This specific behavior is emphasized by the test
472+
/// `test_hashsum::test_check_md5sum_only_one_space`.
473+
fn parse(s: impl AsRef<OsStr>, cached_regex: &mut Option<LineFormat>) -> Option<Self> {
474+
let regexes: &[(&'static Regex, LineFormat)] = &[
475+
(&R_ALGO_BASED, LineFormat::AlgoBased),
476+
(&R_DOUBLE_SPACE, LineFormat::DoubleSpace),
477+
(&R_SINGLE_SPACE, LineFormat::SingleSpace),
456478
];
457479

458480
let line_bytes = os_str_as_bytes(s.as_ref()).expect("UTF-8 decoding failed");
459481

460-
for (regex, algo_based) in regexes {
482+
for (regex, format) in regexes {
461483
if !regex.is_match(line_bytes) {
462484
continue;
463485
}
464486

465487
let mut r = *regex;
466-
if !algo_based {
488+
if *format != LineFormat::AlgoBased {
467489
// The cached regex ensures that when processing non-algo based regexes,
468-
// its cannot be changed (can't have single and double space regexes
490+
// it cannot be changed (can't have single and double space regexes
469491
// used in the same file).
470492
if cached_regex.is_some() {
471-
r = cached_regex.unwrap();
493+
r = cached_regex.unwrap().to_regex();
472494
} else {
473-
*cached_regex = Some(r);
495+
*cached_regex = Some(*format);
474496
}
475497
}
476498

@@ -485,23 +507,13 @@ impl LineInfo {
485507
.map(|m| match_to_string(m).parse::<usize>().unwrap()),
486508
checksum: caps.name("checksum").map(match_to_string).unwrap(),
487509
filename: caps.name("filename").map(|m| m.as_bytes().into()).unwrap(),
488-
regex: r,
510+
format: *format,
489511
});
490512
}
491513
}
492514

493515
None
494516
}
495-
496-
#[inline]
497-
fn is_algo_based(&self) -> bool {
498-
self.algo_name.is_some()
499-
}
500-
501-
#[inline]
502-
fn regex_str(&self) -> &str {
503-
self.regex.as_str()
504-
}
505517
}
506518

507519
fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String {
@@ -730,14 +742,16 @@ fn process_algo_based_line(
730742

731743
/// Check a digest checksum with non-algo based pre-treatment.
732744
fn process_non_algo_based_line(
733-
i: usize,
745+
line_number: usize,
734746
line_info: &LineInfo,
735747
cli_algo_name: &str,
736748
cli_algo_length: Option<usize>,
737749
opts: ChecksumOptions,
738750
) -> Result<(), LineCheckError> {
739751
let mut filename_to_check = line_info.filename.as_slice();
740-
if filename_to_check.starts_with(b"*") && i == 0 && line_info.regex_str() == SINGLE_SPACE_REGEX
752+
if filename_to_check.starts_with(b"*")
753+
&& line_number == 0
754+
&& line_info.format == LineFormat::SingleSpace
741755
{
742756
// Remove the leading asterisk if present - only for the first line
743757
filename_to_check = &filename_to_check[1..];
@@ -774,7 +788,7 @@ fn process_checksum_line(
774788
cli_algo_name: Option<&str>,
775789
cli_algo_length: Option<usize>,
776790
opts: ChecksumOptions,
777-
cached_regex: &mut Option<&'static Regex>,
791+
cached_regex: &mut Option<LineFormat>,
778792
) -> Result<(), LineCheckError> {
779793
let line_bytes = os_str_as_bytes(line)?;
780794

@@ -786,7 +800,7 @@ fn process_checksum_line(
786800
// Use `LineInfo` to extract the data of a line.
787801
// Then, depending on its format, apply a different pre-treatment.
788802
if let Some(line_info) = LineInfo::parse(line, cached_regex) {
789-
if line_info.is_algo_based() {
803+
if line_info.format == LineFormat::AlgoBased {
790804
process_algo_based_line(&line_info, cli_algo_name, opts)
791805
} else if let Some(cli_algo) = cli_algo_name {
792806
// If we match a non-algo based regex, we expect a cli argument
@@ -1284,36 +1298,33 @@ mod tests {
12841298
let line_algo_based =
12851299
OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
12861300
let line_info = LineInfo::parse(&line_algo_based, &mut cached_regex).unwrap();
1287-
assert!(line_info.is_algo_based());
12881301
assert_eq!(line_info.algo_name.as_deref(), Some("MD5"));
12891302
assert!(line_info.algo_bit_len.is_none());
12901303
assert_eq!(line_info.filename, b"example.txt");
12911304
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
1292-
assert_eq!(line_info.regex_str(), ALGO_BASED_REGEX);
1305+
assert_eq!(line_info.format, LineFormat::AlgoBased);
12931306
assert!(cached_regex.is_none());
12941307

12951308
// Test double-space regex
12961309
let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
12971310
let line_info = LineInfo::parse(&line_double_space, &mut cached_regex).unwrap();
1298-
assert!(!line_info.is_algo_based());
12991311
assert!(line_info.algo_name.is_none());
13001312
assert!(line_info.algo_bit_len.is_none());
13011313
assert_eq!(line_info.filename, b"example.txt");
13021314
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
1303-
assert_eq!(line_info.regex_str(), DOUBLE_SPACE_REGEX);
1315+
assert_eq!(line_info.format, LineFormat::DoubleSpace);
13041316
assert!(cached_regex.is_some());
13051317

13061318
cached_regex = None;
13071319

13081320
// Test single-space regex
13091321
let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt");
13101322
let line_info = LineInfo::parse(&line_single_space, &mut cached_regex).unwrap();
1311-
assert!(!line_info.is_algo_based());
13121323
assert!(line_info.algo_name.is_none());
13131324
assert!(line_info.algo_bit_len.is_none());
13141325
assert_eq!(line_info.filename, b"example.txt");
13151326
assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e");
1316-
assert_eq!(line_info.regex_str(), SINGLE_SPACE_REGEX);
1327+
assert_eq!(line_info.format, LineFormat::SingleSpace);
13171328
assert!(cached_regex.is_some());
13181329

13191330
cached_regex = None;
@@ -1328,7 +1339,7 @@ mod tests {
13281339
OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e");
13291340
let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex);
13301341
assert!(res.is_some());
1331-
assert_eq!(res.unwrap().regex_str(), ALGO_BASED_REGEX);
1342+
assert_eq!(line_info.format, LineFormat::AlgoBased);
13321343
assert!(cached_regex.is_none());
13331344

13341345
// Test trailing space after checksum line (should fail)

0 commit comments

Comments
 (0)