@@ -421,8 +421,7 @@ pub fn detect_algo(algo: &str, length: Option<usize>) -> UResult<HashAlgorithm>
421421// algo must be uppercase or b (for blake2b)
422422// 2. <checksum> [* ]<filename>
423423// 3. <checksum> [*]<filename> (only one space)
424- const ALGO_BASED_REGEX : & str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[a-fA-F0-9]+)$" ;
425- const ALGO_BASED_REGEX_BASE64 : & str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$" ;
424+ const ALGO_BASED_REGEX : & str = r"^\s*\\?(?P<algo>(?:[A-Z0-9]+|BLAKE2b))(?:-(?P<bits>\d+))?\s?\((?P<filename>(?-u:.*))\)\s*=\s*(?P<checksum>[A-Za-z0-9+/]+={0,2})$" ;
426425
427426const DOUBLE_SPACE_REGEX : & str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>(?-u:.*))$" ;
428427
@@ -433,7 +432,23 @@ lazy_static! {
433432 static ref R_ALGO_BASED : Regex = Regex :: new( ALGO_BASED_REGEX ) . unwrap( ) ;
434433 static ref R_DOUBLE_SPACE : Regex = Regex :: new( DOUBLE_SPACE_REGEX ) . unwrap( ) ;
435434 static ref R_SINGLE_SPACE : Regex = Regex :: new( SINGLE_SPACE_REGEX ) . unwrap( ) ;
436- static ref R_ALGO_BASED_BASE_64 : Regex = Regex :: new( ALGO_BASED_REGEX_BASE64 ) . unwrap( ) ;
435+ }
436+
437+ #[ derive( Debug , PartialEq , Eq , Clone , Copy ) ]
438+ enum LineFormat {
439+ AlgoBased ,
440+ SingleSpace ,
441+ DoubleSpace ,
442+ }
443+
444+ impl LineFormat {
445+ fn to_regex ( self ) -> & ' static Regex {
446+ match self {
447+ LineFormat :: AlgoBased => & R_ALGO_BASED ,
448+ LineFormat :: SingleSpace => & R_SINGLE_SPACE ,
449+ LineFormat :: DoubleSpace => & R_DOUBLE_SPACE ,
450+ }
451+ }
437452}
438453
439454/// Hold the data extracted from a checksum line.
@@ -443,34 +458,41 @@ struct LineInfo {
443458 checksum : String ,
444459 filename : Vec < u8 > ,
445460
446- regex : & ' static Regex ,
461+ format : LineFormat ,
447462}
448463
449464impl LineInfo {
450- fn parse ( s : impl AsRef < OsStr > , cached_regex : & mut Option < & ' static Regex > ) -> Option < Self > {
451- let regexes: & [ ( & ' static Regex , bool ) ] = & [
452- ( & R_ALGO_BASED , true ) ,
453- ( & R_DOUBLE_SPACE , false ) ,
454- ( & R_SINGLE_SPACE , false ) ,
455- ( & R_ALGO_BASED_BASE_64 , true ) ,
465+ /// Returns a `LineInfo` parsed from a checksum line.
466+ /// The function will run 3 regexes against the line and select the first one that matches
467+ /// to populate the fields of the struct.
468+ /// However, there is a catch to handle regarding the handling of `cached_regex`.
469+ /// In case of non-algo-based regex, if `cached_regex` is Some, it must take the priority
470+ /// over the detected regex. Otherwise, we must set it the the detected regex.
471+ /// This specific behavior is emphasized by the test
472+ /// `test_hashsum::test_check_md5sum_only_one_space`.
473+ fn parse ( s : impl AsRef < OsStr > , cached_regex : & mut Option < LineFormat > ) -> Option < Self > {
474+ let regexes: & [ ( & ' static Regex , LineFormat ) ] = & [
475+ ( & R_ALGO_BASED , LineFormat :: AlgoBased ) ,
476+ ( & R_DOUBLE_SPACE , LineFormat :: DoubleSpace ) ,
477+ ( & R_SINGLE_SPACE , LineFormat :: SingleSpace ) ,
456478 ] ;
457479
458480 let line_bytes = os_str_as_bytes ( s. as_ref ( ) ) . expect ( "UTF-8 decoding failed" ) ;
459481
460- for ( regex, algo_based ) in regexes {
482+ for ( regex, format ) in regexes {
461483 if !regex. is_match ( line_bytes) {
462484 continue ;
463485 }
464486
465487 let mut r = * regex;
466- if !algo_based {
488+ if * format != LineFormat :: AlgoBased {
467489 // The cached regex ensures that when processing non-algo based regexes,
468- // its cannot be changed (can't have single and double space regexes
490+ // it cannot be changed (can't have single and double space regexes
469491 // used in the same file).
470492 if cached_regex. is_some ( ) {
471- r = cached_regex. unwrap ( ) ;
493+ r = cached_regex. unwrap ( ) . to_regex ( ) ;
472494 } else {
473- * cached_regex = Some ( r ) ;
495+ * cached_regex = Some ( * format ) ;
474496 }
475497 }
476498
@@ -485,23 +507,13 @@ impl LineInfo {
485507 . map ( |m| match_to_string ( m) . parse :: < usize > ( ) . unwrap ( ) ) ,
486508 checksum : caps. name ( "checksum" ) . map ( match_to_string) . unwrap ( ) ,
487509 filename : caps. name ( "filename" ) . map ( |m| m. as_bytes ( ) . into ( ) ) . unwrap ( ) ,
488- regex : r ,
510+ format : * format ,
489511 } ) ;
490512 }
491513 }
492514
493515 None
494516 }
495-
496- #[ inline]
497- fn is_algo_based ( & self ) -> bool {
498- self . algo_name . is_some ( )
499- }
500-
501- #[ inline]
502- fn regex_str ( & self ) -> & str {
503- self . regex . as_str ( )
504- }
505517}
506518
507519fn get_filename_for_output ( filename : & OsStr , input_is_stdin : bool ) -> String {
@@ -730,14 +742,16 @@ fn process_algo_based_line(
730742
731743/// Check a digest checksum with non-algo based pre-treatment.
732744fn process_non_algo_based_line (
733- i : usize ,
745+ line_number : usize ,
734746 line_info : & LineInfo ,
735747 cli_algo_name : & str ,
736748 cli_algo_length : Option < usize > ,
737749 opts : ChecksumOptions ,
738750) -> Result < ( ) , LineCheckError > {
739751 let mut filename_to_check = line_info. filename . as_slice ( ) ;
740- if filename_to_check. starts_with ( b"*" ) && i == 0 && line_info. regex_str ( ) == SINGLE_SPACE_REGEX
752+ if filename_to_check. starts_with ( b"*" )
753+ && line_number == 0
754+ && line_info. format == LineFormat :: SingleSpace
741755 {
742756 // Remove the leading asterisk if present - only for the first line
743757 filename_to_check = & filename_to_check[ 1 ..] ;
@@ -774,7 +788,7 @@ fn process_checksum_line(
774788 cli_algo_name : Option < & str > ,
775789 cli_algo_length : Option < usize > ,
776790 opts : ChecksumOptions ,
777- cached_regex : & mut Option < & ' static Regex > ,
791+ cached_regex : & mut Option < LineFormat > ,
778792) -> Result < ( ) , LineCheckError > {
779793 let line_bytes = os_str_as_bytes ( line) ?;
780794
@@ -786,7 +800,7 @@ fn process_checksum_line(
786800 // Use `LineInfo` to extract the data of a line.
787801 // Then, depending on its format, apply a different pre-treatment.
788802 if let Some ( line_info) = LineInfo :: parse ( line, cached_regex) {
789- if line_info. is_algo_based ( ) {
803+ if line_info. format == LineFormat :: AlgoBased {
790804 process_algo_based_line ( & line_info, cli_algo_name, opts)
791805 } else if let Some ( cli_algo) = cli_algo_name {
792806 // If we match a non-algo based regex, we expect a cli argument
@@ -1284,36 +1298,33 @@ mod tests {
12841298 let line_algo_based =
12851299 OsString :: from ( "MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e" ) ;
12861300 let line_info = LineInfo :: parse ( & line_algo_based, & mut cached_regex) . unwrap ( ) ;
1287- assert ! ( line_info. is_algo_based( ) ) ;
12881301 assert_eq ! ( line_info. algo_name. as_deref( ) , Some ( "MD5" ) ) ;
12891302 assert ! ( line_info. algo_bit_len. is_none( ) ) ;
12901303 assert_eq ! ( line_info. filename, b"example.txt" ) ;
12911304 assert_eq ! ( line_info. checksum, "d41d8cd98f00b204e9800998ecf8427e" ) ;
1292- assert_eq ! ( line_info. regex_str ( ) , ALGO_BASED_REGEX ) ;
1305+ assert_eq ! ( line_info. format , LineFormat :: AlgoBased ) ;
12931306 assert ! ( cached_regex. is_none( ) ) ;
12941307
12951308 // Test double-space regex
12961309 let line_double_space = OsString :: from ( "d41d8cd98f00b204e9800998ecf8427e example.txt" ) ;
12971310 let line_info = LineInfo :: parse ( & line_double_space, & mut cached_regex) . unwrap ( ) ;
1298- assert ! ( !line_info. is_algo_based( ) ) ;
12991311 assert ! ( line_info. algo_name. is_none( ) ) ;
13001312 assert ! ( line_info. algo_bit_len. is_none( ) ) ;
13011313 assert_eq ! ( line_info. filename, b"example.txt" ) ;
13021314 assert_eq ! ( line_info. checksum, "d41d8cd98f00b204e9800998ecf8427e" ) ;
1303- assert_eq ! ( line_info. regex_str ( ) , DOUBLE_SPACE_REGEX ) ;
1315+ assert_eq ! ( line_info. format , LineFormat :: DoubleSpace ) ;
13041316 assert ! ( cached_regex. is_some( ) ) ;
13051317
13061318 cached_regex = None ;
13071319
13081320 // Test single-space regex
13091321 let line_single_space = OsString :: from ( "d41d8cd98f00b204e9800998ecf8427e example.txt" ) ;
13101322 let line_info = LineInfo :: parse ( & line_single_space, & mut cached_regex) . unwrap ( ) ;
1311- assert ! ( !line_info. is_algo_based( ) ) ;
13121323 assert ! ( line_info. algo_name. is_none( ) ) ;
13131324 assert ! ( line_info. algo_bit_len. is_none( ) ) ;
13141325 assert_eq ! ( line_info. filename, b"example.txt" ) ;
13151326 assert_eq ! ( line_info. checksum, "d41d8cd98f00b204e9800998ecf8427e" ) ;
1316- assert_eq ! ( line_info. regex_str ( ) , SINGLE_SPACE_REGEX ) ;
1327+ assert_eq ! ( line_info. format , LineFormat :: SingleSpace ) ;
13171328 assert ! ( cached_regex. is_some( ) ) ;
13181329
13191330 cached_regex = None ;
@@ -1328,7 +1339,7 @@ mod tests {
13281339 OsString :: from ( " MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e" ) ;
13291340 let res = LineInfo :: parse ( & line_algo_based_leading_space, & mut cached_regex) ;
13301341 assert ! ( res. is_some( ) ) ;
1331- assert_eq ! ( res . unwrap ( ) . regex_str ( ) , ALGO_BASED_REGEX ) ;
1342+ assert_eq ! ( line_info . format , LineFormat :: AlgoBased ) ;
13321343 assert ! ( cached_regex. is_none( ) ) ;
13331344
13341345 // Test trailing space after checksum line (should fail)
0 commit comments