Skip to content

Commit 65373e4

Browse files
committed
fix(printf): complete shell quoting implementation for %q format
This commit completes the implementation of printf %q shell quoting to match GNU coreutils behavior. Key Changes to shell_quoter.rs: 1. Simplified enter_dollar() logic: - Only wraps existing buffer in ls mode when needed - Handles both empty buffer and dollar-quoted buffer cases - Removed unnecessary empty quotes prefix 2. Fixed apostrophe handling: - After exiting dollar mode, use simple backslash-escape: \' - Standalone single quote uses double quotes: "'" - Embedded quotes use backslash-escape 3. Added EscapeState::Backslash handler: - Control characters (\n, \t, etc.) enter dollar mode - Properly renders escape sequences using $'...' syntax 4. Enhanced ForceQuote handling: - printf %q mode: backslash-escapes metacharacters - ls mode: wraps in outer quotes (no escaping needed) 5. Improved finalize() logic: - printf %q mode returns buffer as-is (no outer quotes) - ls mode adds outer quotes when needed 6. Added show_control awareness: - Created initial_quoting_with_show_control() function - Only treats control chars as special if show_control=true - Hidden control chars (shown as '?') don't need special handling - Added always_quote field to NonEscapedShellQuoter for proper tracking 7. Simplified control character detection: - Checks for control characters in initial_quoting() - Forces single quotes for control char compatibility - Respects show_control flag to avoid unnecessary quoting
1 parent 1efedf2 commit 65373e4

File tree

1 file changed

+79
-34
lines changed

1 file changed

+79
-34
lines changed

src/uucore/src/lib/features/quoting_style/shell_quoter.rs

Lines changed: 79 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ pub(super) struct NonEscapedShellQuoter<'a> {
2525
/// with `?`.
2626
show_control: bool,
2727

28+
/// Whether to always quote the output
29+
always_quote: bool,
30+
2831
// INTERNAL STATE
2932
/// Whether the name should be quoted.
3033
must_quote: bool,
@@ -40,11 +43,13 @@ impl<'a> NonEscapedShellQuoter<'a> {
4043
dirname: bool,
4144
size_hint: usize,
4245
) -> Self {
43-
let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote);
46+
let (quotes, must_quote) =
47+
initial_quoting_with_show_control(reference, dirname, always_quote, show_control);
4448
Self {
4549
reference,
4650
quotes,
4751
show_control,
52+
always_quote,
4853
must_quote,
4954
buffer: Vec::with_capacity(size_hint),
5055
}
@@ -82,7 +87,12 @@ impl Quoter for NonEscapedShellQuoter<'_> {
8287
}
8388

8489
fn finalize(self: Box<Self>) -> Vec<u8> {
85-
finalize_shell_quoter(self.buffer, self.reference, self.must_quote, self.quotes)
90+
finalize_shell_quoter(
91+
self.buffer,
92+
self.reference,
93+
self.must_quote || self.always_quote,
94+
self.quotes,
95+
)
8696
}
8797
}
8898

@@ -126,7 +136,7 @@ impl<'a> EscapedShellQuoter<'a> {
126136
let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote);
127137

128138
// commit_dollar_mode controls quoting strategy:
129-
// true (printf %q): use selective dollar-quoting (same as ls)
139+
// true (printf %q): use selective dollar-quoting
130140
// false (ls): use selective dollar-quoting
131141
// Both modes use selective quoting: enter $'...' only for control chars
132142
let commit_dollar = commit_dollar_mode;
@@ -149,12 +159,11 @@ impl<'a> EscapedShellQuoter<'a> {
149159
// Close any existing quote section first
150160
self.buffer.push(b'\'');
151161
self.in_quote_section = false;
152-
} else if self.buffer.is_empty() {
153-
// Both ls and printf %q modes: add empty quotes when buffer is empty
154-
// This indicates the string starts with something needing quotes
155-
self.buffer.extend(b"''");
156-
} else if !self.commit_dollar {
157-
// ls mode with existing content: wrap it in quotes
162+
} else if !self.commit_dollar
163+
&& !self.buffer.is_empty()
164+
&& !self.buffer.windows(2).any(|w| w == b"$'")
165+
{
166+
// ls mode (not printf %q): Buffer has content but no dollar quotes - wrap it
158167
let quote = if self.quotes == Quotes::Single {
159168
b'\''
160169
} else {
@@ -165,7 +174,12 @@ impl<'a> EscapedShellQuoter<'a> {
165174
quoted.extend_from_slice(&self.buffer);
166175
quoted.push(quote);
167176
self.buffer = quoted;
177+
} else if !self.commit_dollar && self.buffer.is_empty() {
178+
// ls mode: When entering dollar mode with empty buffer (entire string needs escaping),
179+
// prefix with empty quote '' to match GNU behavior
180+
self.buffer.extend(b"''");
168181
}
182+
// If buffer is empty or already contains $'...' just append next $'
169183
self.buffer.extend(b"$'");
170184
self.in_dollar = true;
171185
}
@@ -189,20 +203,21 @@ impl Quoter for EscapedShellQuoter<'_> {
189203
EscapeState::Backslash('\'') | EscapeState::Char('\'') => {
190204
if self.in_dollar {
191205
// Inside $'...' section - need to exit, then handle apostrophe
192-
self.exit_dollar();
206+
self.exit_dollar(); // This adds closing '
193207
self.must_quote = true;
194-
// Backslash-escape the apostrophe
208+
// After exit_dollar's closing ', add: backslash-quote
209+
// Result: $'\001' + \' = $'\001'\'
195210
self.buffer.extend(b"\\'");
211+
// Now optionally open a new quote section for following chars
212+
// Don't set in_quote_section - let next char decide
196213
} else if self.commit_dollar {
197214
// printf %q mode, not in dollar section
198-
// Check if this is a standalone single quote
215+
self.must_quote = true;
216+
// Special case: standalone single quote uses double quotes
199217
if self.buffer.is_empty() && self.reference.len() == 1 {
200-
// Standalone single quote uses double quotes: "'"
201-
self.must_quote = true;
202218
self.buffer.extend(b"\"'\"");
203219
} else {
204-
// Embedded quote - backslash escape
205-
self.must_quote = true;
220+
// Embedded quote - backslash-escape it
206221
self.buffer.extend(b"\\'");
207222
}
208223
} else {
@@ -228,6 +243,16 @@ impl Quoter for EscapedShellQuoter<'_> {
228243
self.buffer.extend(b"\\\\");
229244
}
230245
}
246+
EscapeState::Backslash(x) => {
247+
// Control character escapes (\n, \t, \r, etc.) or single quote
248+
// These MUST use $'...' syntax to preserve the escape sequence
249+
if !self.in_dollar {
250+
self.enter_dollar();
251+
}
252+
self.must_quote = true;
253+
self.buffer.push(b'\\');
254+
self.buffer.extend(x.to_string().as_bytes());
255+
}
231256
EscapeState::Char(x) => {
232257
if self.in_dollar {
233258
if self.commit_dollar {
@@ -237,12 +262,7 @@ impl Quoter for EscapedShellQuoter<'_> {
237262
} else {
238263
// In selective dollar mode (ls), exit dollar and start new quoted section
239264
self.exit_dollar();
240-
let quote = if self.quotes == Quotes::Single {
241-
b'\''
242-
} else {
243-
b'"'
244-
};
245-
self.buffer.push(quote);
265+
self.buffer.push(b'\'');
246266
self.in_quote_section = true;
247267
self.buffer.extend(x.to_string().as_bytes());
248268
}
@@ -268,7 +288,8 @@ impl Quoter for EscapedShellQuoter<'_> {
268288
} else {
269289
// Not in dollar mode
270290
if self.commit_dollar {
271-
// printf %q: just add the character, will be quoted in finalize
291+
// printf %q: backslash-escape the special character
292+
self.buffer.push(b'\\');
272293
self.buffer.extend(x.to_string().as_bytes());
273294
} else {
274295
// ls: will be wrapped in outer quotes, no escaping needed
@@ -326,21 +347,30 @@ impl Quoter for EscapedShellQuoter<'_> {
326347
return self.buffer;
327348
}
328349

329-
// If buffer contains dollar-quoted sections, we're done
330-
if self.buffer.windows(2).any(|w| w == b"$'") {
350+
// Check if we need outer quotes
351+
let contains_quote_chars = bytes_start_with(self.reference, SPECIAL_SHELL_CHARS_START);
352+
let should_quote = self.must_quote || self.always_quote || contains_quote_chars;
353+
354+
// If buffer contains dollar-quoted sections and doesn't need outer quotes, we're done
355+
if self.buffer.windows(2).any(|w| w == b"$'") && !should_quote {
331356
return self.buffer;
332357
}
333358

334-
// For strings without dollar quotes, add outer quotes if needed
335-
let contains_quote_chars = bytes_start_with(self.reference, SPECIAL_SHELL_CHARS_START);
336-
let should_quote = self.must_quote || self.always_quote || contains_quote_chars;
337-
338359
// For printf %q (commit_dollar=true), if the buffer already contains quotes (e.g., "'"
339360
// for a standalone single quote), don't add outer quotes
340-
if self.commit_dollar && (self.buffer.starts_with(b"\"'\"") || self.buffer.starts_with(b"'") || self.buffer.starts_with(b"\"")) {
361+
if self.commit_dollar
362+
&& (self.buffer.starts_with(b"\"'\"")
363+
|| self.buffer.starts_with(b"'")
364+
|| self.buffer.starts_with(b"\""))
365+
{
341366
return self.buffer;
342367
}
343-
368+
369+
// For printf %q (commit_dollar=true), don't add outer quotes
370+
if self.commit_dollar {
371+
return self.buffer;
372+
}
373+
344374
if should_quote {
345375
let mut quoted = Vec::with_capacity(self.buffer.len() + 2);
346376
let quote = if self.quotes == Quotes::Single {
@@ -360,9 +390,24 @@ impl Quoter for EscapedShellQuoter<'_> {
360390

361391
/// Deduce the initial quoting status from the provided information
362392
fn initial_quoting(input: &[u8], dirname: bool, always_quote: bool) -> (Quotes, bool) {
363-
if input
364-
.iter()
365-
.any(|c| shell_escaped_char_set(dirname).contains(c))
393+
initial_quoting_with_show_control(input, dirname, always_quote, true)
394+
}
395+
396+
/// Deduce the initial quoting status, with awareness of whether control chars will be shown
397+
fn initial_quoting_with_show_control(
398+
input: &[u8],
399+
dirname: bool,
400+
always_quote: bool,
401+
show_control: bool,
402+
) -> (Quotes, bool) {
403+
// Check for control characters FIRST - they require $'...' which only works with single quotes
404+
// But only consider them if we're showing them; if hiding, they become '?' which isn't special
405+
let has_control_chars = show_control && input.iter().any(|&c| c < 32 || c == 127);
406+
407+
if has_control_chars
408+
|| input
409+
.iter()
410+
.any(|c| shell_escaped_char_set(dirname).contains(c))
366411
{
367412
(Quotes::Single, true)
368413
} else if input.contains(&b'\'') {

0 commit comments

Comments
 (0)