From 6823259bba0e65c775b54a4a0cd6f95ce7f3fbf1 Mon Sep 17 00:00:00 2001 From: mukunda katta Date: Fri, 29 May 2026 01:30:46 -0700 Subject: [PATCH 1/2] fix: accept a\text on the same line in POSIX mode (#144) GNU sed accepts text on the same line as the a/i/c backslash form, e.g. `a\bar`, even under --posix. compile_text_command_posix previously ate trailing whitespace after the backslash and rejected any remaining characters with 'extra characters after \'. Now the remainder of the line after `\` is the first appended line, with leading whitespace preserved (matching GNU), and a trailing `\` continues onto the next line. `a\` with nothing after it still reports an incomplete command. Verified against GNU sed 4.10 for a/i/c, leading-space preservation, continuation, and the incomplete case. Two prior unit tests asserted the old (non-GNU) rejection and now assert the GNU-correct text; adds a same-line continuation unit test and a dual-mode integration fixture. Closes #144 --- src/sed/compiler.rs | 90 ++++++++++++++----- tests/by-util/test_sed.rs | 4 + .../fixtures/sed/output/text_append_same_line | 15 ++++ 3 files changed, 85 insertions(+), 24 deletions(-) create mode 100644 tests/fixtures/sed/output/text_append_same_line diff --git a/src/sed/compiler.rs b/src/sed/compiler.rs index bcfff315..ba90824b 100644 --- a/src/sed/compiler.rs +++ b/src/sed/compiler.rs @@ -1221,28 +1221,41 @@ fn compile_text_command_posix( } line.advance(); // Skip \. - line.eat_spaces(); // Skip any whitespace at the end of \. - if !line.eol() { - return compilation_error( - lines, - line, - format!( - "extra characters after \\ at the end of `{}' command", - cmd.code - ), - ); - } let mut text = String::new(); - while let Some(line) = lines.next_line()? { - if line.ends_with('\\') { - // Line ends with \ to escape \n; remove the trailing \. - text.push_str(&line[..line.len() - 1]); + + // GNU also accepts text on the same line as `a\`, e.g. `a\bar`. The + // remainder of the current line is the first appended line, with leading + // whitespace preserved. A trailing `\` escapes the newline so the text + // continues on the following line. + let mut needs_more = true; + if !line.eol() { + let mut first = String::new(); + while !line.eol() { + first.push(line.current()); + line.advance(); + } + if let Some(stripped) = first.strip_suffix('\\') { + text.push_str(stripped); text.push('\n'); } else { - text.push_str(&line); + text.push_str(&first); text.push('\n'); - break; + needs_more = false; + } + } + + if needs_more { + while let Some(line) = lines.next_line()? { + if line.ends_with('\\') { + // Line ends with \ to escape \n; remove the trailing \. + text.push_str(&line[..line.len() - 1]); + text.push('\n'); + } else { + text.push_str(&line); + text.push('\n'); + break; + } } } @@ -2734,7 +2747,10 @@ mod tests { #[test] fn test_compile_text_command_posix_spaces_single_line() { - let mut chars = make_char_provider("a \\ "); + // `a\ ` (backslash followed by a single space): GNU treats the space + // as same-line text with leading whitespace preserved, so the text is + // a single space, not the next script line. + let mut chars = make_char_provider("a\\ "); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); let mut context = ProcessingContext { @@ -2745,7 +2761,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text.to_string(), "line1\n"); + assert_eq!(text.to_string(), " \n"); } _ => panic!("Expected CommandData::Text"), } @@ -2893,7 +2909,10 @@ mod tests { } #[test] - fn test_compile_text_command_posix_with_trailing_chars() { + fn test_compile_text_command_posix_same_line_text() { + // `a \ foo`: the outer handler eats the space after `a`, then `\` + // introduces same-line text. GNU keeps the text after `\` verbatim + // (here a leading space then "foo"), rather than rejecting it. let mut chars = make_char_provider("a \\ foo"); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); @@ -2902,10 +2921,33 @@ mod tests { ..Default::default() }; - let result = compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context); - assert!(result.is_err()); - let err = result.unwrap_err().to_string(); - assert!(err.contains("extra characters after \\")); + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); + match &cmd.data { + CommandData::Text(text) => { + assert_eq!(text.to_string(), " foo\n"); + } + _ => panic!("Expected CommandData::Text"), + } + } + + #[test] + fn test_compile_text_command_posix_same_line_continuation() { + // Same-line text ending in `\` continues onto the next script line. + let mut chars = make_char_provider("a\\bar\\"); + let mut lines = make_line_provider(&["baz", "next"]); + let mut cmd = Command::default(); + let mut context = ProcessingContext { + posix: true, + ..Default::default() + }; + + compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); + match &cmd.data { + CommandData::Text(text) => { + assert_eq!(text.to_string(), "bar\nbaz\n"); + } + _ => panic!("Expected CommandData::Text"), + } } // read_file_path diff --git a/tests/by-util/test_sed.rs b/tests/by-util/test_sed.rs index b31fed76..24854b02 100644 --- a/tests/by-util/test_sed.rs +++ b/tests/by-util/test_sed.rs @@ -631,6 +631,10 @@ extra ] ); +// Text supplied on the same line as `a\` (GNU accepts this in POSIX mode +// too; see issue #144). Runs under both --posix and GNU parsing. +check_output_posix!(text_append_same_line, ["-e", r"4a\extra", LINES1]); + check_output_posix!( text_insert_quit, [ diff --git a/tests/fixtures/sed/output/text_append_same_line b/tests/fixtures/sed/output/text_append_same_line new file mode 100644 index 00000000..932349a2 --- /dev/null +++ b/tests/fixtures/sed/output/text_append_same_line @@ -0,0 +1,15 @@ +l1_1 +l1_2 +l1_3 +l1_4 +extra +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 From 71ef679d4946ce196a8688a06edb5bfd1c2f966f Mon Sep 17 00:00:00 2001 From: MukundaKatta Date: Sat, 13 Jun 2026 12:03:56 -0700 Subject: [PATCH 2/2] fix(posix): reject same-line append text under --posix Per @dspinellis review on #451: POSIX (and Seventh Edition Unix sed) do not allow text after a\/c\/i\ on the same line. Restore the strict POSIX path (error on trailing characters) and keep same-line text as a GNU extension in compile_text_command_gnu. Updated the POSIX tests to assert rejection. --- src/sed/compiler.rs | 89 ++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 54 deletions(-) diff --git a/src/sed/compiler.rs b/src/sed/compiler.rs index ba90824b..722ecc6e 100644 --- a/src/sed/compiler.rs +++ b/src/sed/compiler.rs @@ -1221,41 +1221,31 @@ fn compile_text_command_posix( } line.advance(); // Skip \. + line.eat_spaces(); // Skip any whitespace at the end of \. + if !line.eol() { + // POSIX (and Seventh Edition Unix sed) do not allow text after the + // `a\`/`c\`/`i\` command on the same line. Same-line text is a GNU + // extension, handled by compile_text_command_gnu in non-POSIX mode. + return compilation_error( + lines, + line, + format!( + "extra characters after \\ at the end of `{}' command", + cmd.code + ), + ); + } let mut text = String::new(); - - // GNU also accepts text on the same line as `a\`, e.g. `a\bar`. The - // remainder of the current line is the first appended line, with leading - // whitespace preserved. A trailing `\` escapes the newline so the text - // continues on the following line. - let mut needs_more = true; - if !line.eol() { - let mut first = String::new(); - while !line.eol() { - first.push(line.current()); - line.advance(); - } - if let Some(stripped) = first.strip_suffix('\\') { - text.push_str(stripped); + while let Some(line) = lines.next_line()? { + if line.ends_with('\\') { + // Line ends with \ to escape \n; remove the trailing \. + text.push_str(&line[..line.len() - 1]); text.push('\n'); } else { - text.push_str(&first); + text.push_str(&line); text.push('\n'); - needs_more = false; - } - } - - if needs_more { - while let Some(line) = lines.next_line()? { - if line.ends_with('\\') { - // Line ends with \ to escape \n; remove the trailing \. - text.push_str(&line[..line.len() - 1]); - text.push('\n'); - } else { - text.push_str(&line); - text.push('\n'); - break; - } + break; } } @@ -2747,10 +2737,9 @@ mod tests { #[test] fn test_compile_text_command_posix_spaces_single_line() { - // `a\ ` (backslash followed by a single space): GNU treats the space - // as same-line text with leading whitespace preserved, so the text is - // a single space, not the next script line. - let mut chars = make_char_provider("a\\ "); + // Under --posix, `a \ ` skips the whitespace after `\`; the text comes + // from the following script line(s), not the same line. + let mut chars = make_char_provider("a \\ "); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); let mut context = ProcessingContext { @@ -2761,7 +2750,7 @@ mod tests { compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); match &cmd.data { CommandData::Text(text) => { - assert_eq!(text.to_string(), " \n"); + assert_eq!(text.to_string(), "line1\n"); } _ => panic!("Expected CommandData::Text"), } @@ -2910,10 +2899,9 @@ mod tests { #[test] fn test_compile_text_command_posix_same_line_text() { - // `a \ foo`: the outer handler eats the space after `a`, then `\` - // introduces same-line text. GNU keeps the text after `\` verbatim - // (here a leading space then "foo"), rather than rejecting it. - let mut chars = make_char_provider("a \\ foo"); + // Under --posix, text after `a\` on the same line is rejected (it is a + // GNU extension). Cf. dspinellis review on uutils/sed#451. + let mut chars = make_char_provider("a\\bar"); let mut lines = make_line_provider(&["line1", "line2"]); let mut cmd = Command::default(); let mut context = ProcessingContext { @@ -2921,18 +2909,16 @@ mod tests { ..Default::default() }; - compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); - match &cmd.data { - CommandData::Text(text) => { - assert_eq!(text.to_string(), " foo\n"); - } - _ => panic!("Expected CommandData::Text"), - } + let result = compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("extra characters after")); } #[test] fn test_compile_text_command_posix_same_line_continuation() { - // Same-line text ending in `\` continues onto the next script line. + // Same-line text, even with a trailing `\` continuation, is rejected + // under --posix; only `a\` followed by a newline is valid. let mut chars = make_char_provider("a\\bar\\"); let mut lines = make_line_provider(&["baz", "next"]); let mut cmd = Command::default(); @@ -2941,13 +2927,8 @@ mod tests { ..Default::default() }; - compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context).unwrap(); - match &cmd.data { - CommandData::Text(text) => { - assert_eq!(text.to_string(), "bar\nbaz\n"); - } - _ => panic!("Expected CommandData::Text"), - } + let result = compile_text_command(&mut lines, &mut chars, &mut cmd, &mut context); + assert!(result.is_err()); } // read_file_path