Skip to content

Commit 51161b0

Browse files
committed
fix(printf): implement bash-compatible %q shell-escape format
1 parent bd58575 commit 51161b0

File tree

6 files changed

+470
-146
lines changed

6 files changed

+470
-146
lines changed

.vscode/cspell.dictionaries/jargon.wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ makedev
9090
mebi
9191
mebibytes
9292
mergeable
93+
metacharacters
9394
microbenchmark
9495
microbenchmarks
9596
microbenchmarking

src/uucore/src/lib/features/format/spec.rs

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ use super::{
1515
};
1616
use crate::{
1717
format::FormatArguments,
18+
i18n::UEncoding,
1819
os_str_as_bytes,
19-
quoting_style::{QuotingStyle, locale_aware_escape_name},
20+
quoting_style::{QuotingStyle, escape_name},
2021
};
2122
use std::{io::Write, num::NonZero, ops::ControlFlow};
2223

@@ -403,10 +404,14 @@ impl Spec {
403404
writer.write_all(&parsed).map_err(FormatError::IoError)
404405
}
405406
Self::QuotedString { position } => {
406-
let s = locale_aware_escape_name(
407-
args.next_string(*position),
408-
QuotingStyle::SHELL_ESCAPE,
409-
);
407+
// printf %q uses committed dollar mode (entire string in $'...' when control chars present)
408+
let printf_style = QuotingStyle::Shell {
409+
escape: true,
410+
always_quote: false,
411+
show_control: false,
412+
commit_dollar_mode: true, // printf %q style
413+
};
414+
let s = escape_name(args.next_string(*position), printf_style, UEncoding::Utf8);
410415
let bytes = os_str_as_bytes(&s)?;
411416
writer.write_all(bytes).map_err(FormatError::IoError)
412417
}
@@ -595,10 +600,13 @@ fn eat_number(rest: &mut &[u8], index: &mut usize) -> Option<usize> {
595600
match rest[*index..].iter().position(|b| !b.is_ascii_digit()) {
596601
None | Some(0) => None,
597602
Some(i) => {
598-
// Handle large numbers that would cause overflow
599-
let num_str = std::str::from_utf8(&rest[*index..(*index + i)]).unwrap();
603+
// Handle potential overflow when parsing large numbers
604+
let parsed = std::str::from_utf8(&rest[*index..(*index + i)])
605+
.unwrap()
606+
.parse()
607+
.ok()?;
600608
*index += i;
601-
Some(num_str.parse().unwrap_or(usize::MAX))
609+
Some(parsed)
602610
}
603611
}
604612
}

src/uucore/src/lib/features/quoting_style/escaped_char.rs

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,19 @@ pub enum EscapeState {
2626
}
2727

2828
/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte.
29-
/// Only supports characters up to 2 bytes long in UTF-8.
29+
/// Supports characters up to 4 bytes long in UTF-8.
3030
pub struct EscapeOctal {
31-
c: [u8; 2],
31+
bytes: [u8; 4],
32+
num_bytes: usize,
33+
byte_idx: usize,
34+
digit_idx: u8,
3235
state: EscapeOctalState,
33-
idx: u8,
3436
}
3537

3638
enum EscapeOctalState {
3739
Done,
38-
FirstBackslash,
39-
FirstValue,
40-
LastBackslash,
41-
LastValue,
40+
Backslash,
41+
Value,
4242
}
4343

4444
fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 {
@@ -51,30 +51,23 @@ impl Iterator for EscapeOctal {
5151
fn next(&mut self) -> Option<char> {
5252
match self.state {
5353
EscapeOctalState::Done => None,
54-
EscapeOctalState::FirstBackslash => {
55-
self.state = EscapeOctalState::FirstValue;
54+
EscapeOctalState::Backslash => {
55+
self.state = EscapeOctalState::Value;
5656
Some('\\')
5757
}
58-
EscapeOctalState::LastBackslash => {
59-
self.state = EscapeOctalState::LastValue;
60-
Some('\\')
61-
}
62-
EscapeOctalState::FirstValue => {
63-
let octal_digit = byte_to_octal_digit(self.c[0], self.idx);
64-
if self.idx == 0 {
65-
self.state = EscapeOctalState::LastBackslash;
66-
self.idx = 2;
67-
} else {
68-
self.idx -= 1;
69-
}
70-
Some(from_digit(octal_digit.into(), 8).unwrap())
71-
}
72-
EscapeOctalState::LastValue => {
73-
let octal_digit = byte_to_octal_digit(self.c[1], self.idx);
74-
if self.idx == 0 {
75-
self.state = EscapeOctalState::Done;
58+
EscapeOctalState::Value => {
59+
let octal_digit = byte_to_octal_digit(self.bytes[self.byte_idx], self.digit_idx);
60+
if self.digit_idx == 0 {
61+
// Move to next byte
62+
self.byte_idx += 1;
63+
if self.byte_idx >= self.num_bytes {
64+
self.state = EscapeOctalState::Done;
65+
} else {
66+
self.state = EscapeOctalState::Backslash;
67+
self.digit_idx = 2;
68+
}
7669
} else {
77-
self.idx -= 1;
70+
self.digit_idx -= 1;
7871
}
7972
Some(from_digit(octal_digit.into(), 8).unwrap())
8073
}
@@ -88,20 +81,24 @@ impl EscapeOctal {
8881
return Self::from_byte(c as u8);
8982
}
9083

91-
let mut buf = [0; 2];
92-
let _s = c.encode_utf8(&mut buf);
84+
let mut bytes = [0; 4];
85+
let len = c.encode_utf8(&mut bytes).len();
9386
Self {
94-
c: buf,
95-
idx: 2,
96-
state: EscapeOctalState::FirstBackslash,
87+
bytes,
88+
num_bytes: len,
89+
byte_idx: 0,
90+
digit_idx: 2,
91+
state: EscapeOctalState::Backslash,
9792
}
9893
}
9994

10095
fn from_byte(b: u8) -> Self {
10196
Self {
102-
c: [0, b],
103-
idx: 2,
104-
state: EscapeOctalState::LastBackslash,
97+
bytes: [b, 0, 0, 0],
98+
num_bytes: 1,
99+
byte_idx: 0,
100+
digit_idx: 2,
101+
state: EscapeOctalState::Backslash,
105102
}
106103
}
107104
}

src/uucore/src/lib/features/quoting_style/mod.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ pub enum QuotingStyle {
3535

3636
/// Whether to show control and non-unicode characters, or replace them with `?`.
3737
show_control: bool,
38+
39+
/// Whether to commit to dollar quoting for the entire string (printf %q style).
40+
/// true: committed mode - wrap entire string in $'...' when control chars present
41+
/// false: selective mode (ls style) - only wrap individual control chars in $'...'
42+
commit_dollar_mode: bool,
3843
},
3944

4045
/// Escape the name as a C string.
@@ -58,24 +63,28 @@ impl QuotingStyle {
5863
escape: false,
5964
always_quote: false,
6065
show_control: false,
66+
commit_dollar_mode: false, // ls style - selective dollar mode
6167
};
6268

6369
pub const SHELL_ESCAPE: Self = Self::Shell {
6470
escape: true,
6571
always_quote: false,
6672
show_control: false,
73+
commit_dollar_mode: false, // ls style - selective dollar mode
6774
};
6875

6976
pub const SHELL_QUOTE: Self = Self::Shell {
7077
escape: false,
7178
always_quote: true,
7279
show_control: false,
80+
commit_dollar_mode: false, // ls style - selective dollar mode
7381
};
7482

7583
pub const SHELL_ESCAPE_QUOTE: Self = Self::Shell {
7684
escape: true,
7785
always_quote: true,
7886
show_control: false,
87+
commit_dollar_mode: false, // ls style - selective dollar mode
7988
};
8089

8190
pub const C_NO_QUOTES: Self = Self::C {
@@ -94,11 +103,13 @@ impl QuotingStyle {
94103
Shell {
95104
escape,
96105
always_quote,
106+
commit_dollar_mode,
97107
..
98108
} => Shell {
99109
escape,
100110
always_quote,
101111
show_control,
112+
commit_dollar_mode,
102113
},
103114
Literal { .. } => Literal { show_control },
104115
C { .. } => self,
@@ -161,17 +172,20 @@ fn escape_name_inner(
161172
QuotingStyle::Shell {
162173
escape: true,
163174
always_quote,
175+
commit_dollar_mode,
164176
..
165177
} => Box::new(EscapedShellQuoter::new(
166178
name,
167179
always_quote,
168180
dirname,
181+
commit_dollar_mode,
169182
name.len(),
170183
)),
171184
QuotingStyle::Shell {
172185
escape: false,
173186
always_quote,
174187
show_control,
188+
..
175189
} => Box::new(NonEscapedShellQuoter::new(
176190
name,
177191
show_control,
@@ -235,6 +249,7 @@ impl fmt::Display for QuotingStyle {
235249
escape,
236250
always_quote,
237251
show_control,
252+
..
238253
} => {
239254
let mut style = "shell".to_string();
240255
if escape {
@@ -761,7 +776,9 @@ mod tests {
761776
],
762777
);
763778

764-
// mixed with valid characters
779+
// mixed with valid characters (invalid byte 0xA7 followed by underscore)
780+
// The correct output for shell-escape should be: ''$'\247''_'
781+
// (empty string, ANSI-C quote the invalid byte, then quote the underscore)
765782
check_names_raw_both(
766783
&[continuation, ascii],
767784
&[

0 commit comments

Comments
 (0)