Skip to content

Commit 34421f5

Browse files
authored
Merge pull request #6134 from sargas/expr-error-messages
expr: Fix assorted test errors in tests/expr/expr.pl
2 parents b6f6d93 + 1e0f697 commit 34421f5

File tree

2 files changed

+259
-10
lines changed

2 files changed

+259
-10
lines changed

src/uu/expr/src/expr.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ pub enum ExprError {
3434
DivisionByZero,
3535
InvalidRegexExpression,
3636
ExpectedClosingBraceAfter(String),
37+
ExpectedClosingBraceInsteadOf(String),
38+
UnmatchedOpeningParenthesis,
39+
UnmatchedClosingParenthesis,
40+
UnmatchedOpeningBrace,
41+
UnmatchedClosingBrace,
42+
InvalidContent(String),
3743
}
3844

3945
impl Display for ExprError {
@@ -50,7 +56,25 @@ impl Display for ExprError {
5056
Self::DivisionByZero => write!(f, "division by zero"),
5157
Self::InvalidRegexExpression => write!(f, "Invalid regex expression"),
5258
Self::ExpectedClosingBraceAfter(s) => {
53-
write!(f, "expected ')' after {}", s.quote())
59+
write!(f, "syntax error: expecting ')' after {}", s.quote())
60+
}
61+
Self::ExpectedClosingBraceInsteadOf(s) => {
62+
write!(f, "syntax error: expecting ')' instead of {}", s.quote())
63+
}
64+
Self::UnmatchedOpeningParenthesis => {
65+
write!(f, "Unmatched ( or \\(")
66+
}
67+
Self::UnmatchedClosingParenthesis => {
68+
write!(f, "Unmatched ) or \\)")
69+
}
70+
Self::UnmatchedOpeningBrace => {
71+
write!(f, "Unmatched \\{{")
72+
}
73+
Self::UnmatchedClosingBrace => {
74+
write!(f, "Unmatched ) or \\}}")
75+
}
76+
Self::InvalidContent(s) => {
77+
write!(f, "Invalid content of {}", s)
5478
}
5579
}
5680
}

src/uu/expr/src/syntax_tree.rs

Lines changed: 234 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// spell-checker:ignore (ToDO) ints paren prec multibytes
77

88
use num_bigint::{BigInt, ParseBigIntError};
9-
use num_traits::ToPrimitive;
9+
use num_traits::{ToPrimitive, Zero};
1010
use onig::{Regex, RegexOptions, Syntax};
1111

1212
use crate::{ExprError, ExprResult};
@@ -139,7 +139,9 @@ impl StringOp {
139139
Self::Match => {
140140
let left = left.eval()?.eval_as_string();
141141
let right = right.eval()?.eval_as_string();
142-
let re_string = format!("^{right}");
142+
check_posix_regex_errors(&right)?;
143+
let prefix = if right.starts_with('*') { r"^\" } else { "^" };
144+
let re_string = format!("{prefix}{right}");
143145
let re = Regex::with_options(
144146
&re_string,
145147
RegexOptions::REGEX_OPTION_NONE,
@@ -148,7 +150,7 @@ impl StringOp {
148150
.map_err(|_| ExprError::InvalidRegexExpression)?;
149151
Ok(if re.captures_len() > 0 {
150152
re.captures(&left)
151-
.map(|captures| captures.at(1).unwrap())
153+
.and_then(|captures| captures.at(1))
152154
.unwrap_or("")
153155
.to_string()
154156
} else {
@@ -173,6 +175,99 @@ impl StringOp {
173175
}
174176
}
175177

178+
/// Check for errors in a supplied regular expression
179+
///
180+
/// GNU coreutils shows messages for invalid regular expressions
181+
/// differently from the oniguruma library used by the regex crate.
182+
/// This method attempts to do these checks manually in one pass
183+
/// through the regular expression.
184+
///
185+
/// This method is not comprehensively checking all cases in which
186+
/// a regular expression could be invalid; any cases not caught will
187+
/// result in a [ExprError::InvalidRegexExpression] when passing the
188+
/// regular expression through the Oniguruma bindings. This method is
189+
/// intended to just identify a few situations for which GNU coreutils
190+
/// has specific error messages.
191+
fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> {
192+
let mut escaped_parens: u64 = 0;
193+
let mut escaped_braces: u64 = 0;
194+
let mut escaped = false;
195+
196+
let mut repeating_pattern_text = String::new();
197+
let mut invalid_content_error = false;
198+
199+
for c in pattern.chars() {
200+
match (escaped, c) {
201+
(true, ')') => {
202+
escaped_parens = escaped_parens
203+
.checked_sub(1)
204+
.ok_or(ExprError::UnmatchedClosingParenthesis)?;
205+
}
206+
(true, '(') => {
207+
escaped_parens += 1;
208+
}
209+
(true, '}') => {
210+
escaped_braces = escaped_braces
211+
.checked_sub(1)
212+
.ok_or(ExprError::UnmatchedClosingBrace)?;
213+
let mut repetition =
214+
repeating_pattern_text[..repeating_pattern_text.len() - 1].splitn(2, ',');
215+
match (
216+
repetition
217+
.next()
218+
.expect("splitn always returns at least one string"),
219+
repetition.next(),
220+
) {
221+
("", None) => {
222+
// Empty repeating pattern
223+
invalid_content_error = true;
224+
}
225+
(x, None) | (x, Some("")) => {
226+
if x.parse::<i16>().is_err() {
227+
invalid_content_error = true;
228+
}
229+
}
230+
("", Some(x)) => {
231+
if x.parse::<i16>().is_err() {
232+
invalid_content_error = true;
233+
}
234+
}
235+
(f, Some(l)) => {
236+
if let (Ok(f), Ok(l)) = (f.parse::<i16>(), l.parse::<i16>()) {
237+
invalid_content_error = invalid_content_error || f > l;
238+
} else {
239+
invalid_content_error = true;
240+
}
241+
}
242+
}
243+
repeating_pattern_text.clear();
244+
}
245+
(true, '{') => {
246+
escaped_braces += 1;
247+
}
248+
_ => {
249+
if escaped_braces > 0 && repeating_pattern_text.len() <= 13 {
250+
repeating_pattern_text.push(c);
251+
}
252+
if escaped_braces > 0 && !(c.is_ascii_digit() || c == '\\' || c == ',') {
253+
invalid_content_error = true;
254+
}
255+
}
256+
}
257+
escaped = !escaped && c == '\\';
258+
}
259+
match (
260+
escaped_parens.is_zero(),
261+
escaped_braces.is_zero(),
262+
invalid_content_error,
263+
) {
264+
(true, true, false) => Ok(()),
265+
(_, false, _) => Err(ExprError::UnmatchedOpeningBrace),
266+
(false, _, _) => Err(ExprError::UnmatchedOpeningParenthesis),
267+
(true, true, true) => Err(ExprError::InvalidContent(r"\{\}".to_string())),
268+
}
269+
}
270+
176271
/// Precedence for infix binary operators
177272
const PRECEDENCE: &[&[(&str, BinOp)]] = &[
178273
&[("|", BinOp::String(StringOp::Or))],
@@ -442,13 +537,21 @@ impl<'a> Parser<'a> {
442537
},
443538
"(" => {
444539
let s = self.parse_expression()?;
445-
let close_paren = self.next()?;
446-
if close_paren != ")" {
540+
match self.next() {
541+
Ok(")") => {}
447542
// Since we have parsed at least a '(', there will be a token
448543
// at `self.index - 1`. So this indexing won't panic.
449-
return Err(ExprError::ExpectedClosingBraceAfter(
450-
self.input[self.index - 1].into(),
451-
));
544+
Ok(_) => {
545+
return Err(ExprError::ExpectedClosingBraceInsteadOf(
546+
self.input[self.index - 1].into(),
547+
));
548+
}
549+
Err(ExprError::MissingArgument(_)) => {
550+
return Err(ExprError::ExpectedClosingBraceAfter(
551+
self.input[self.index - 1].into(),
552+
));
553+
}
554+
Err(e) => return Err(e),
452555
}
453556
s
454557
}
@@ -484,7 +587,10 @@ pub fn is_truthy(s: &NumOrStr) -> bool {
484587

485588
#[cfg(test)]
486589
mod test {
487-
use super::{AstNode, BinOp, NumericOp, RelationOp, StringOp};
590+
use crate::ExprError;
591+
use crate::ExprError::InvalidContent;
592+
593+
use super::{check_posix_regex_errors, AstNode, BinOp, NumericOp, RelationOp, StringOp};
488594

489595
impl From<&str> for AstNode {
490596
fn from(value: &str) -> Self {
@@ -587,4 +693,123 @@ mod test {
587693
)),
588694
);
589695
}
696+
697+
#[test]
698+
fn missing_closing_parenthesis() {
699+
assert_eq!(
700+
AstNode::parse(&["(", "42"]),
701+
Err(ExprError::ExpectedClosingBraceAfter("42".to_string()))
702+
);
703+
assert_eq!(
704+
AstNode::parse(&["(", "42", "a"]),
705+
Err(ExprError::ExpectedClosingBraceInsteadOf("a".to_string()))
706+
);
707+
}
708+
709+
#[test]
710+
fn empty_substitution() {
711+
// causes a panic in 0.0.25
712+
let result = AstNode::parse(&["a", ":", r"\(b\)*"])
713+
.unwrap()
714+
.eval()
715+
.unwrap();
716+
assert_eq!(result.eval_as_string(), "");
717+
}
718+
719+
#[test]
720+
fn starting_stars_become_escaped() {
721+
let result = AstNode::parse(&["cats", ":", r"*cats"])
722+
.unwrap()
723+
.eval()
724+
.unwrap();
725+
assert_eq!(result.eval_as_string(), "0");
726+
727+
let result = AstNode::parse(&["*cats", ":", r"*cats"])
728+
.unwrap()
729+
.eval()
730+
.unwrap();
731+
assert_eq!(result.eval_as_string(), "5");
732+
}
733+
734+
#[test]
735+
fn only_match_in_beginning() {
736+
let result = AstNode::parse(&["budget", ":", r"get"])
737+
.unwrap()
738+
.eval()
739+
.unwrap();
740+
assert_eq!(result.eval_as_string(), "0");
741+
}
742+
743+
#[test]
744+
fn check_regex_valid() {
745+
assert!(check_posix_regex_errors(r"(a+b) \(a* b\)").is_ok());
746+
}
747+
748+
#[test]
749+
fn check_regex_simple_repeating_pattern() {
750+
assert!(check_posix_regex_errors(r"\(a+b\)\{4\}").is_ok());
751+
}
752+
753+
#[test]
754+
fn check_regex_missing_closing() {
755+
assert_eq!(
756+
check_posix_regex_errors(r"\(abc"),
757+
Err(ExprError::UnmatchedOpeningParenthesis)
758+
);
759+
760+
assert_eq!(
761+
check_posix_regex_errors(r"\{1,2"),
762+
Err(ExprError::UnmatchedOpeningBrace)
763+
);
764+
}
765+
766+
#[test]
767+
fn check_regex_missing_opening() {
768+
assert_eq!(
769+
check_posix_regex_errors(r"abc\)"),
770+
Err(ExprError::UnmatchedClosingParenthesis)
771+
);
772+
773+
assert_eq!(
774+
check_posix_regex_errors(r"abc\}"),
775+
Err(ExprError::UnmatchedClosingBrace)
776+
);
777+
}
778+
779+
#[test]
780+
fn check_regex_empty_repeating_pattern() {
781+
assert_eq!(
782+
check_posix_regex_errors("ab\\{\\}"),
783+
Err(InvalidContent(r"\{\}".to_string()))
784+
)
785+
}
786+
787+
#[test]
788+
fn check_regex_intervals_two_numbers() {
789+
assert_eq!(
790+
// out of order
791+
check_posix_regex_errors("ab\\{1,0\\}"),
792+
Err(InvalidContent(r"\{\}".to_string()))
793+
);
794+
assert_eq!(
795+
check_posix_regex_errors("ab\\{1,a\\}"),
796+
Err(InvalidContent(r"\{\}".to_string()))
797+
);
798+
assert_eq!(
799+
check_posix_regex_errors("ab\\{a,3\\}"),
800+
Err(InvalidContent(r"\{\}".to_string()))
801+
);
802+
assert_eq!(
803+
check_posix_regex_errors("ab\\{a,b\\}"),
804+
Err(InvalidContent(r"\{\}".to_string()))
805+
);
806+
assert_eq!(
807+
check_posix_regex_errors("ab\\{a,\\}"),
808+
Err(InvalidContent(r"\{\}".to_string()))
809+
);
810+
assert_eq!(
811+
check_posix_regex_errors("ab\\{,b\\}"),
812+
Err(InvalidContent(r"\{\}".to_string()))
813+
);
814+
}
590815
}

0 commit comments

Comments
 (0)