66// spell-checker:ignore (ToDO) ints paren prec multibytes
77
88use num_bigint:: { BigInt , ParseBigIntError } ;
9- use num_traits:: ToPrimitive ;
9+ use num_traits:: { ToPrimitive , Zero } ;
1010use onig:: { Regex , RegexOptions , Syntax } ;
1111
1212use crate :: { ExprError , ExprResult } ;
@@ -139,7 +139,9 @@ impl StringOp {
139139 Self :: Match => {
140140 let left = left. eval ( ) ?. eval_as_string ( ) ;
141141 let right = right. eval ( ) ?. eval_as_string ( ) ;
142- let re_string = format ! ( "^{right}" ) ;
142+ check_posix_regex_errors ( & right) ?;
143+ let prefix = if right. starts_with ( '*' ) { r"^\" } else { "^" } ;
144+ let re_string = format ! ( "{prefix}{right}" ) ;
143145 let re = Regex :: with_options (
144146 & re_string,
145147 RegexOptions :: REGEX_OPTION_NONE ,
@@ -148,7 +150,7 @@ impl StringOp {
148150 . map_err ( |_| ExprError :: InvalidRegexExpression ) ?;
149151 Ok ( if re. captures_len ( ) > 0 {
150152 re. captures ( & left)
151- . map ( |captures| captures. at ( 1 ) . unwrap ( ) )
153+ . and_then ( |captures| captures. at ( 1 ) )
152154 . unwrap_or ( "" )
153155 . to_string ( )
154156 } else {
@@ -173,6 +175,99 @@ impl StringOp {
173175 }
174176}
175177
178+ /// Check for errors in a supplied regular expression
179+ ///
180+ /// GNU coreutils shows messages for invalid regular expressions
181+ /// differently from the oniguruma library used by the regex crate.
182+ /// This method attempts to do these checks manually in one pass
183+ /// through the regular expression.
184+ ///
185+ /// This method is not comprehensively checking all cases in which
186+ /// a regular expression could be invalid; any cases not caught will
187+ /// result in a [ExprError::InvalidRegexExpression] when passing the
188+ /// regular expression through the Oniguruma bindings. This method is
189+ /// intended to just identify a few situations for which GNU coreutils
190+ /// has specific error messages.
191+ fn check_posix_regex_errors ( pattern : & str ) -> ExprResult < ( ) > {
192+ let mut escaped_parens: u64 = 0 ;
193+ let mut escaped_braces: u64 = 0 ;
194+ let mut escaped = false ;
195+
196+ let mut repeating_pattern_text = String :: new ( ) ;
197+ let mut invalid_content_error = false ;
198+
199+ for c in pattern. chars ( ) {
200+ match ( escaped, c) {
201+ ( true , ')' ) => {
202+ escaped_parens = escaped_parens
203+ . checked_sub ( 1 )
204+ . ok_or ( ExprError :: UnmatchedClosingParenthesis ) ?;
205+ }
206+ ( true , '(' ) => {
207+ escaped_parens += 1 ;
208+ }
209+ ( true , '}' ) => {
210+ escaped_braces = escaped_braces
211+ . checked_sub ( 1 )
212+ . ok_or ( ExprError :: UnmatchedClosingBrace ) ?;
213+ let mut repetition =
214+ repeating_pattern_text[ ..repeating_pattern_text. len ( ) - 1 ] . splitn ( 2 , ',' ) ;
215+ match (
216+ repetition
217+ . next ( )
218+ . expect ( "splitn always returns at least one string" ) ,
219+ repetition. next ( ) ,
220+ ) {
221+ ( "" , None ) => {
222+ // Empty repeating pattern
223+ invalid_content_error = true ;
224+ }
225+ ( x, None ) | ( x, Some ( "" ) ) => {
226+ if x. parse :: < i16 > ( ) . is_err ( ) {
227+ invalid_content_error = true ;
228+ }
229+ }
230+ ( "" , Some ( x) ) => {
231+ if x. parse :: < i16 > ( ) . is_err ( ) {
232+ invalid_content_error = true ;
233+ }
234+ }
235+ ( f, Some ( l) ) => {
236+ if let ( Ok ( f) , Ok ( l) ) = ( f. parse :: < i16 > ( ) , l. parse :: < i16 > ( ) ) {
237+ invalid_content_error = invalid_content_error || f > l;
238+ } else {
239+ invalid_content_error = true ;
240+ }
241+ }
242+ }
243+ repeating_pattern_text. clear ( ) ;
244+ }
245+ ( true , '{' ) => {
246+ escaped_braces += 1 ;
247+ }
248+ _ => {
249+ if escaped_braces > 0 && repeating_pattern_text. len ( ) <= 13 {
250+ repeating_pattern_text. push ( c) ;
251+ }
252+ if escaped_braces > 0 && !( c. is_ascii_digit ( ) || c == '\\' || c == ',' ) {
253+ invalid_content_error = true ;
254+ }
255+ }
256+ }
257+ escaped = !escaped && c == '\\' ;
258+ }
259+ match (
260+ escaped_parens. is_zero ( ) ,
261+ escaped_braces. is_zero ( ) ,
262+ invalid_content_error,
263+ ) {
264+ ( true , true , false ) => Ok ( ( ) ) ,
265+ ( _, false , _) => Err ( ExprError :: UnmatchedOpeningBrace ) ,
266+ ( false , _, _) => Err ( ExprError :: UnmatchedOpeningParenthesis ) ,
267+ ( true , true , true ) => Err ( ExprError :: InvalidContent ( r"\{\}" . to_string ( ) ) ) ,
268+ }
269+ }
270+
176271/// Precedence for infix binary operators
177272const PRECEDENCE : & [ & [ ( & str , BinOp ) ] ] = & [
178273 & [ ( "|" , BinOp :: String ( StringOp :: Or ) ) ] ,
@@ -442,13 +537,21 @@ impl<'a> Parser<'a> {
442537 } ,
443538 "(" => {
444539 let s = self . parse_expression ( ) ?;
445- let close_paren = self . next ( ) ? ;
446- if close_paren != ")" {
540+ match self . next ( ) {
541+ Ok ( ")" ) => { }
447542 // Since we have parsed at least a '(', there will be a token
448543 // at `self.index - 1`. So this indexing won't panic.
449- return Err ( ExprError :: ExpectedClosingBraceAfter (
450- self . input [ self . index - 1 ] . into ( ) ,
451- ) ) ;
544+ Ok ( _) => {
545+ return Err ( ExprError :: ExpectedClosingBraceInsteadOf (
546+ self . input [ self . index - 1 ] . into ( ) ,
547+ ) ) ;
548+ }
549+ Err ( ExprError :: MissingArgument ( _) ) => {
550+ return Err ( ExprError :: ExpectedClosingBraceAfter (
551+ self . input [ self . index - 1 ] . into ( ) ,
552+ ) ) ;
553+ }
554+ Err ( e) => return Err ( e) ,
452555 }
453556 s
454557 }
@@ -484,7 +587,10 @@ pub fn is_truthy(s: &NumOrStr) -> bool {
484587
485588#[ cfg( test) ]
486589mod test {
487- use super :: { AstNode , BinOp , NumericOp , RelationOp , StringOp } ;
590+ use crate :: ExprError ;
591+ use crate :: ExprError :: InvalidContent ;
592+
593+ use super :: { check_posix_regex_errors, AstNode , BinOp , NumericOp , RelationOp , StringOp } ;
488594
489595 impl From < & str > for AstNode {
490596 fn from ( value : & str ) -> Self {
@@ -587,4 +693,123 @@ mod test {
587693 ) ) ,
588694 ) ;
589695 }
696+
697+ #[ test]
698+ fn missing_closing_parenthesis ( ) {
699+ assert_eq ! (
700+ AstNode :: parse( & [ "(" , "42" ] ) ,
701+ Err ( ExprError :: ExpectedClosingBraceAfter ( "42" . to_string( ) ) )
702+ ) ;
703+ assert_eq ! (
704+ AstNode :: parse( & [ "(" , "42" , "a" ] ) ,
705+ Err ( ExprError :: ExpectedClosingBraceInsteadOf ( "a" . to_string( ) ) )
706+ ) ;
707+ }
708+
709+ #[ test]
710+ fn empty_substitution ( ) {
711+ // causes a panic in 0.0.25
712+ let result = AstNode :: parse ( & [ "a" , ":" , r"\(b\)*" ] )
713+ . unwrap ( )
714+ . eval ( )
715+ . unwrap ( ) ;
716+ assert_eq ! ( result. eval_as_string( ) , "" ) ;
717+ }
718+
719+ #[ test]
720+ fn starting_stars_become_escaped ( ) {
721+ let result = AstNode :: parse ( & [ "cats" , ":" , r"*cats" ] )
722+ . unwrap ( )
723+ . eval ( )
724+ . unwrap ( ) ;
725+ assert_eq ! ( result. eval_as_string( ) , "0" ) ;
726+
727+ let result = AstNode :: parse ( & [ "*cats" , ":" , r"*cats" ] )
728+ . unwrap ( )
729+ . eval ( )
730+ . unwrap ( ) ;
731+ assert_eq ! ( result. eval_as_string( ) , "5" ) ;
732+ }
733+
734+ #[ test]
735+ fn only_match_in_beginning ( ) {
736+ let result = AstNode :: parse ( & [ "budget" , ":" , r"get" ] )
737+ . unwrap ( )
738+ . eval ( )
739+ . unwrap ( ) ;
740+ assert_eq ! ( result. eval_as_string( ) , "0" ) ;
741+ }
742+
743+ #[ test]
744+ fn check_regex_valid ( ) {
745+ assert ! ( check_posix_regex_errors( r"(a+b) \(a* b\)" ) . is_ok( ) ) ;
746+ }
747+
748+ #[ test]
749+ fn check_regex_simple_repeating_pattern ( ) {
750+ assert ! ( check_posix_regex_errors( r"\(a+b\)\{4\}" ) . is_ok( ) ) ;
751+ }
752+
753+ #[ test]
754+ fn check_regex_missing_closing ( ) {
755+ assert_eq ! (
756+ check_posix_regex_errors( r"\(abc" ) ,
757+ Err ( ExprError :: UnmatchedOpeningParenthesis )
758+ ) ;
759+
760+ assert_eq ! (
761+ check_posix_regex_errors( r"\{1,2" ) ,
762+ Err ( ExprError :: UnmatchedOpeningBrace )
763+ ) ;
764+ }
765+
766+ #[ test]
767+ fn check_regex_missing_opening ( ) {
768+ assert_eq ! (
769+ check_posix_regex_errors( r"abc\)" ) ,
770+ Err ( ExprError :: UnmatchedClosingParenthesis )
771+ ) ;
772+
773+ assert_eq ! (
774+ check_posix_regex_errors( r"abc\}" ) ,
775+ Err ( ExprError :: UnmatchedClosingBrace )
776+ ) ;
777+ }
778+
779+ #[ test]
780+ fn check_regex_empty_repeating_pattern ( ) {
781+ assert_eq ! (
782+ check_posix_regex_errors( "ab\\ {\\ }" ) ,
783+ Err ( InvalidContent ( r"\{\}" . to_string( ) ) )
784+ )
785+ }
786+
787+ #[ test]
788+ fn check_regex_intervals_two_numbers ( ) {
789+ assert_eq ! (
790+ // out of order
791+ check_posix_regex_errors( "ab\\ {1,0\\ }" ) ,
792+ Err ( InvalidContent ( r"\{\}" . to_string( ) ) )
793+ ) ;
794+ assert_eq ! (
795+ check_posix_regex_errors( "ab\\ {1,a\\ }" ) ,
796+ Err ( InvalidContent ( r"\{\}" . to_string( ) ) )
797+ ) ;
798+ assert_eq ! (
799+ check_posix_regex_errors( "ab\\ {a,3\\ }" ) ,
800+ Err ( InvalidContent ( r"\{\}" . to_string( ) ) )
801+ ) ;
802+ assert_eq ! (
803+ check_posix_regex_errors( "ab\\ {a,b\\ }" ) ,
804+ Err ( InvalidContent ( r"\{\}" . to_string( ) ) )
805+ ) ;
806+ assert_eq ! (
807+ check_posix_regex_errors( "ab\\ {a,\\ }" ) ,
808+ Err ( InvalidContent ( r"\{\}" . to_string( ) ) )
809+ ) ;
810+ assert_eq ! (
811+ check_posix_regex_errors( "ab\\ {,b\\ }" ) ,
812+ Err ( InvalidContent ( r"\{\}" . to_string( ) ) )
813+ ) ;
814+ }
590815}
0 commit comments