Skip to content

Commit 3f9514d

Browse files
authored
Merge pull request #7974 from frendsick/fix/expr-regex-special-cases
expr: Handle more special cases for regex pattern
2 parents 18b963e + ab5cf74 commit 3f9514d

File tree

3 files changed

+97
-51
lines changed

3 files changed

+97
-51
lines changed

src/uu/expr/src/expr.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ pub enum ExprError {
5050
UnmatchedClosingBrace,
5151
#[error("Invalid content of \\{{\\}}")]
5252
InvalidBracketContent,
53+
#[error("Trailing backslash")]
54+
TrailingBackslash,
5355
}
5456

5557
impl UError for ExprError {

src/uu/expr/src/syntax_tree.rs

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ impl StringOp {
161161
match first {
162162
Some('^') => {} // Start of string anchor is already added
163163
Some('*') => re_string.push_str(r"\*"),
164+
Some('$') if !is_end_of_expression(&pattern_chars) => re_string.push_str(r"\$"),
165+
Some('\\') if right.len() == 1 => return Err(ExprError::TrailingBackslash),
164166
Some(char) => re_string.push(char),
165167
None => return Ok(0.into()),
166168
};
@@ -169,6 +171,8 @@ impl StringOp {
169171
let mut prev = first.unwrap_or_default();
170172
let mut prev_is_escaped = false;
171173
while let Some(curr) = pattern_chars.next() {
174+
let curr_is_escaped = prev == '\\' && !prev_is_escaped;
175+
172176
match curr {
173177
'^' => match (prev, prev_is_escaped) {
174178
// Start of a capturing group
@@ -181,25 +185,11 @@ impl StringOp {
181185
| ('\\', false) => re_string.push(curr),
182186
_ => re_string.push_str(r"\^"),
183187
},
184-
'$' => {
185-
if let Some('\\') = pattern_chars.peek() {
186-
// The next character was checked to be a backslash
187-
let backslash = pattern_chars.next().unwrap_or_default();
188-
match pattern_chars.peek() {
189-
// End of a capturing group
190-
Some(')') => re_string.push('$'),
191-
// End of an alternative pattern
192-
Some('|') => re_string.push('$'),
193-
_ => re_string.push_str(r"\$"),
194-
}
195-
re_string.push(backslash);
196-
} else if (prev_is_escaped || prev != '\\')
197-
&& pattern_chars.peek().is_some()
198-
{
199-
re_string.push_str(r"\$");
200-
} else {
201-
re_string.push('$');
202-
}
188+
'$' if !curr_is_escaped && !is_end_of_expression(&pattern_chars) => {
189+
re_string.push_str(r"\$");
190+
}
191+
'\\' if !curr_is_escaped && pattern_chars.peek().is_none() => {
192+
return Err(ExprError::TrailingBackslash);
203193
}
204194
_ => re_string.push(curr),
205195
}
@@ -241,6 +231,19 @@ impl StringOp {
241231
}
242232
}
243233

234+
/// Check if regex pattern character iterator is at the end of a regex expression or subexpression
235+
fn is_end_of_expression<I>(pattern_chars: &I) -> bool
236+
where
237+
I: Iterator<Item = char> + Clone,
238+
{
239+
let mut pattern_chars_clone = pattern_chars.clone();
240+
match pattern_chars_clone.next() {
241+
Some('\\') => matches!(pattern_chars_clone.next(), Some(')' | '|')),
242+
None => true, // No characters left
243+
_ => false,
244+
}
245+
}
246+
244247
/// Check for errors in a supplied regular expression
245248
///
246249
/// GNU coreutils shows messages for invalid regular expressions

tests/by-util/test_expr.rs

Lines changed: 73 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -273,35 +273,44 @@ fn test_length_mb() {
273273
}
274274

275275
#[test]
276-
fn test_regex() {
277-
new_ucmd!()
278-
.args(&["a^b", ":", "a^b"])
279-
.succeeds()
280-
.stdout_only("3\n");
276+
fn test_regex_empty() {
277+
new_ucmd!().args(&["", ":", ""]).fails().stdout_only("0\n");
281278
new_ucmd!()
282-
.args(&["a^b", ":", "a\\^b"])
283-
.succeeds()
284-
.stdout_only("3\n");
279+
.args(&["abc", ":", ""])
280+
.fails()
281+
.stdout_only("0\n");
282+
}
283+
284+
#[test]
285+
fn test_regex_trailing_backslash() {
285286
new_ucmd!()
286-
.args(&["b", ":", "a\\|^b"])
287+
.args(&["\\", ":", "\\\\"])
287288
.succeeds()
288289
.stdout_only("1\n");
289290
new_ucmd!()
290-
.args(&["ab", ":", "\\(^a\\)b"])
291-
.succeeds()
292-
.stdout_only("a\n");
291+
.args(&["\\", ":", "\\"])
292+
.fails()
293+
.stderr_only("expr: Trailing backslash\n");
293294
new_ucmd!()
294-
.args(&["a$b", ":", "a\\$b"])
295+
.args(&["abc\\", ":", "abc\\\\"])
295296
.succeeds()
296-
.stdout_only("3\n");
297+
.stdout_only("4\n");
297298
new_ucmd!()
298-
.args(&["a", ":", "a$\\|b"])
299+
.args(&["abc\\", ":", "abc\\"])
300+
.fails()
301+
.stderr_only("expr: Trailing backslash\n");
302+
}
303+
304+
#[test]
305+
fn test_regex_caret() {
306+
new_ucmd!()
307+
.args(&["a^b", ":", "a^b"])
299308
.succeeds()
300-
.stdout_only("1\n");
309+
.stdout_only("3\n");
301310
new_ucmd!()
302-
.args(&["ab", ":", "a\\(b$\\)"])
311+
.args(&["a^b", ":", "a\\^b"])
303312
.succeeds()
304-
.stdout_only("b\n");
313+
.stdout_only("3\n");
305314
new_ucmd!()
306315
.args(&["abc", ":", "^abc"])
307316
.succeeds()
@@ -311,13 +320,17 @@ fn test_regex() {
311320
.succeeds()
312321
.stdout_only("4\n");
313322
new_ucmd!()
314-
.args(&["b^$ic", ":", "b^\\$ic"])
323+
.args(&["b", ":", "a\\|^b"])
315324
.succeeds()
316-
.stdout_only("5\n");
325+
.stdout_only("1\n");
317326
new_ucmd!()
318-
.args(&["a$c", ":", "a$\\c"])
327+
.args(&["ab", ":", "\\(^a\\)b"])
319328
.succeeds()
320-
.stdout_only("3\n");
329+
.stdout_only("a\n");
330+
new_ucmd!()
331+
.args(&["^abc", ":", "^abc"])
332+
.fails()
333+
.stdout_only("0\n");
321334
new_ucmd!()
322335
.args(&["^^^^^^^^^", ":", "^^^"])
323336
.succeeds()
@@ -338,29 +351,57 @@ fn test_regex() {
338351
.args(&["\\a", ":", "\\\\[^^]"])
339352
.succeeds()
340353
.stdout_only("2\n");
354+
// Patterns are anchored to the beginning of the pattern "^bc"
355+
new_ucmd!()
356+
.args(&["abc", ":", "bc"])
357+
.fails()
358+
.stdout_only("0\n");
341359
new_ucmd!()
342360
.args(&["^a", ":", "^^[^^]"])
343361
.succeeds()
344362
.stdout_only("2\n");
345363
new_ucmd!()
346-
.args(&["-5", ":", "-\\{0,1\\}[0-9]*$"])
364+
.args(&["abc", ":", "ab[^c]"])
365+
.fails()
366+
.stdout_only("0\n");
367+
}
368+
369+
#[test]
370+
fn test_regex_dollar() {
371+
new_ucmd!()
372+
.args(&["a$b", ":", "a\\$b"])
373+
.succeeds()
374+
.stdout_only("3\n");
375+
new_ucmd!()
376+
.args(&["a", ":", "a$\\|b"])
377+
.succeeds()
378+
.stdout_only("1\n");
379+
new_ucmd!()
380+
.args(&["ab", ":", "a\\(b$\\)"])
381+
.succeeds()
382+
.stdout_only("b\n");
383+
new_ucmd!()
384+
.args(&["a$c", ":", "a$\\c"])
385+
.succeeds()
386+
.stdout_only("3\n");
387+
new_ucmd!()
388+
.args(&["$a", ":", "$a"])
347389
.succeeds()
348390
.stdout_only("2\n");
349-
new_ucmd!().args(&["", ":", ""]).fails().stdout_only("0\n");
350391
new_ucmd!()
351-
.args(&["abc", ":", ""])
352-
.fails()
353-
.stdout_only("0\n");
392+
.args(&["a", ":", "a$\\|b"])
393+
.succeeds()
394+
.stdout_only("1\n");
354395
new_ucmd!()
355-
.args(&["abc", ":", "bc"])
356-
.fails()
357-
.stdout_only("0\n");
396+
.args(&["-5", ":", "-\\{0,1\\}[0-9]*$"])
397+
.succeeds()
398+
.stdout_only("2\n");
358399
new_ucmd!()
359-
.args(&["^abc", ":", "^abc"])
400+
.args(&["$", ":", "$"])
360401
.fails()
361402
.stdout_only("0\n");
362403
new_ucmd!()
363-
.args(&["abc", ":", "ab[^c]"])
404+
.args(&["a$", ":", "a$\\|b"])
364405
.fails()
365406
.stdout_only("0\n");
366407
}

0 commit comments

Comments
 (0)