Skip to content

Commit e801364

Browse files
tr: Add ambiguous octal escape warning, issue #6821
1 parent 4bc9e7b commit e801364

File tree

2 files changed

+42
-8
lines changed

2 files changed

+42
-8
lines changed

src/uu/tr/src/operation.rs

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@ use nom::{
1616
IResult,
1717
};
1818
use std::{
19+
char,
1920
collections::{HashMap, HashSet},
2021
error::Error,
2122
fmt::{Debug, Display},
2223
io::{BufRead, Write},
2324
ops::Not,
2425
};
2526
use uucore::error::UError;
27+
use uucore::show_warning;
2628

2729
#[derive(Debug, Clone)]
2830
pub enum BadSequence {
@@ -293,7 +295,9 @@ impl Sequence {
293295
Self::parse_class,
294296
Self::parse_char_equal,
295297
// NOTE: This must be the last one
296-
map(Self::parse_backslash_or_char, |s| Ok(Self::Char(s))),
298+
map(Self::parse_backslash_or_char_with_warning, |s| {
299+
Ok(Self::Char(s))
300+
}),
297301
)))(input)
298302
.map(|(_, r)| r)
299303
.unwrap()
@@ -302,10 +306,16 @@ impl Sequence {
302306
}
303307

304308
fn parse_octal(input: &[u8]) -> IResult<&[u8], u8> {
309+
// For `parse_char_range`, `parse_char_star`, `parse_char_repeat`, `parse_char_equal`.
310+
// Bcause in these patterns, there's no ambiguous cases.
311+
preceded(tag("\\"), Self::parse_octal_up_to_three_digits)(input)
312+
}
313+
314+
fn parse_octal_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
305315
preceded(
306316
tag("\\"),
307317
alt((
308-
Self::parse_octal_up_to_three_digits,
318+
Self::parse_octal_up_to_three_digits_with_warning,
309319
// Fallback for if the three digit octal escape is greater than \377 (0xFF), and therefore can't be
310320
// parsed as as a byte
311321
// See test `test_multibyte_octal_sequence`
@@ -319,13 +329,31 @@ impl Sequence {
319329
recognize(many_m_n(1, 3, one_of("01234567"))),
320330
|out: &[u8]| {
321331
let str_to_parse = std::str::from_utf8(out).unwrap();
332+
match u8::from_str_radix(str_to_parse, 8) {
333+
Ok(ue) => Some(ue),
334+
Err(_pa) => None,
335+
}
336+
},
337+
)(input)
338+
}
322339

340+
fn parse_octal_up_to_three_digits_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
341+
map_opt(
342+
recognize(many_m_n(1, 3, one_of("01234567"))),
343+
|out: &[u8]| {
344+
let str_to_parse = std::str::from_utf8(out).unwrap();
323345
match u8::from_str_radix(str_to_parse, 8) {
324346
Ok(ue) => Some(ue),
325347
Err(_pa) => {
326-
// TODO
327-
// A warning needs to be printed here
328-
// See https://github.com/uutils/coreutils/issues/6821
348+
let origin_octal: &str = std::str::from_utf8(input).unwrap();
349+
let actual_octal_tail: &str = std::str::from_utf8(&input[0..2]).unwrap();
350+
let outstand_char: char = char::from_u32(input[2] as u32).unwrap();
351+
show_warning!(
352+
"the ambiguous octal escape \\{} is being\n interpreted as the 2-byte sequence \\0{}, {}",
353+
origin_octal,
354+
actual_octal_tail,
355+
outstand_char
356+
);
329357
None
330358
}
331359
}
@@ -360,6 +388,14 @@ impl Sequence {
360388
alt((Self::parse_octal, Self::parse_backslash, Self::single_char))(input)
361389
}
362390

391+
fn parse_backslash_or_char_with_warning(input: &[u8]) -> IResult<&[u8], u8> {
392+
alt((
393+
Self::parse_octal_with_warning,
394+
Self::parse_backslash,
395+
Self::single_char,
396+
))(input)
397+
}
398+
363399
fn single_char(input: &[u8]) -> IResult<&[u8], u8> {
364400
take(1usize)(input).map(|(l, a)| (l, a[0]))
365401
}

tests/by-util/test_tr.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1494,9 +1494,7 @@ fn test_multibyte_octal_sequence() {
14941494
.args(&["-d", r"\501"])
14951495
.pipe_in("(1Ł)")
14961496
.succeeds()
1497-
// TODO
1498-
// A warning needs to be printed here
1499-
// See https://github.com/uutils/coreutils/issues/6821
1497+
.stderr_is("tr: warning: the ambiguous octal escape \\501 is being\n interpreted as the 2-byte sequence \\050, 1\n")
15001498
.stdout_is("Ł)");
15011499
}
15021500

0 commit comments

Comments
 (0)