Skip to content

Commit 6d9ab8f

Browse files
andrewliebenowjtracey
authored andcommitted
Restructure non-UTF-8 input error handling
1 parent 9071432 commit 6d9ab8f

File tree

6 files changed

+142
-107
lines changed

6 files changed

+142
-107
lines changed

src/uu/echo/src/echo.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use std::ffi::OsString;
1010
use std::io::{self, StdoutLock, Write};
1111
use uucore::error::UResult;
1212
use uucore::format::{EscapedChar, FormatChar, OctalParsing, parse_escape_only};
13-
use uucore::{format_usage, help_about, help_section, help_usage};
13+
use uucore::{format_usage, help_about, help_section, help_usage, os_str_as_bytes_verbose};
1414

1515
const ABOUT: &str = help_about!("echo.md");
1616
const USAGE: &str = help_usage!("echo.md");
@@ -137,8 +137,9 @@ fn execute(
137137
escaped: bool,
138138
) -> UResult<()> {
139139
for (i, input) in arguments_after_options.into_iter().enumerate() {
140-
let bytes = uucore::format::try_get_bytes_from_os_str(&input)?;
140+
let bytes = os_str_as_bytes_verbose(&input)?;
141141

142+
// Don't print a space before the first argument
142143
if i > 0 {
143144
stdout_lock.write_all(b" ")?;
144145
}

src/uu/printf/src/printf.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@ use std::io::stdout;
88
use std::ops::ControlFlow;
99
use uucore::error::{UResult, UUsageError};
1010
use uucore::format::{
11-
FormatArgument, FormatArguments, FormatItem, parse_spec_and_escape, try_get_bytes_from_os_str,
11+
FormatArgument, FormatArguments, FormatError, FormatItem, parse_spec_and_escape,
12+
};
13+
use uucore::{
14+
format_usage, help_about, help_section, help_usage, os_str_as_bytes_verbose, show_warning,
1215
};
13-
use uucore::{format_usage, help_about, help_section, help_usage, show_warning};
1416

1517
const VERSION: &str = "version";
1618
const HELP: &str = "help";
@@ -30,7 +32,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
3032
let format = matches
3133
.get_one::<OsString>(options::FORMAT)
3234
.ok_or_else(|| UUsageError::new(1, "missing operand"))?;
33-
let format = try_get_bytes_from_os_str(format)?;
35+
let format = os_str_as_bytes_verbose(format).map_err(FormatError::from)?;
3436

3537
let values: Vec<_> = match matches.get_many::<OsString>(options::ARGUMENT) {
3638
Some(s) => s

src/uucore/src/lib/features/format/argument.rs

Lines changed: 6 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,15 @@
66
use super::{ExtendedBigDecimal, FormatError};
77
use crate::format::spec::ArgumentLocation;
88
use crate::{
9-
error::{UError, set_exit_code},
9+
error::set_exit_code,
10+
os_str_as_bytes_verbose, os_str_as_str_verbose,
1011
parser::num_parser::{ExtendedParser, ExtendedParserError},
1112
quoting_style::{Quotes, QuotingStyle, escape_name},
1213
show_error, show_warning,
1314
};
1415
use os_display::Quotable;
1516
use std::{
16-
error::Error,
1717
ffi::{OsStr, OsString},
18-
fmt::Display,
1918
num::NonZero,
2019
};
2120

@@ -77,7 +76,7 @@ impl<'a> FormatArguments<'a> {
7776
pub fn next_char(&mut self, position: &ArgumentLocation) -> Result<u8, FormatError> {
7877
match self.next_arg(position) {
7978
Some(FormatArgument::Char(c)) => Ok(*c as u8),
80-
Some(FormatArgument::Unparsed(os)) => match try_get_bytes_from_os_str(os)?.first() {
79+
Some(FormatArgument::Unparsed(os)) => match os_str_as_bytes_verbose(os)?.first() {
8180
Some(&byte) => Ok(byte),
8281
None => Ok(b'\0'),
8382
},
@@ -96,7 +95,7 @@ impl<'a> FormatArguments<'a> {
9695
match self.next_arg(position) {
9796
Some(FormatArgument::SignedInt(n)) => Ok(*n),
9897
Some(FormatArgument::Unparsed(os)) => {
99-
let str = try_get_str_from_os_str(os)?;
98+
let str = os_str_as_str_verbose(os)?;
10099

101100
Ok(extract_value(i64::extended_parse(str), str))
102101
}
@@ -108,7 +107,7 @@ impl<'a> FormatArguments<'a> {
108107
match self.next_arg(position) {
109108
Some(FormatArgument::UnsignedInt(n)) => Ok(*n),
110109
Some(FormatArgument::Unparsed(os)) => {
111-
let s = try_get_str_from_os_str(os)?;
110+
let s = os_str_as_str_verbose(os)?;
112111
// Check if the string is a character literal enclosed in quotes
113112
if s.starts_with(['"', '\'']) {
114113
// Extract the content between the quotes safely using chars
@@ -139,7 +138,7 @@ impl<'a> FormatArguments<'a> {
139138
match self.next_arg(position) {
140139
Some(FormatArgument::Float(n)) => Ok(n.clone()),
141140
Some(FormatArgument::Unparsed(os)) => {
142-
let s = try_get_str_from_os_str(os)?;
141+
let s = os_str_as_str_verbose(os)?;
143142

144143
Ok(extract_value(ExtendedBigDecimal::extended_parse(s), s))
145144
}
@@ -207,59 +206,6 @@ fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &s
207206
}
208207
}
209208

210-
#[derive(Debug)]
211-
pub struct NonUtf8OsStr(pub String);
212-
213-
impl Display for NonUtf8OsStr {
214-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
215-
f.write_fmt(format_args!(
216-
"invalid (non-UTF-8) string like {} encountered",
217-
self.0.quote(),
218-
))
219-
}
220-
}
221-
222-
impl Error for NonUtf8OsStr {}
223-
impl UError for NonUtf8OsStr {}
224-
225-
pub fn try_get_str_from_os_str(os_str: &OsStr) -> Result<&str, NonUtf8OsStr> {
226-
match os_str.to_str() {
227-
Some(st) => Ok(st),
228-
None => {
229-
let cow = os_str.to_string_lossy();
230-
231-
Err(NonUtf8OsStr(cow.to_string()))
232-
}
233-
}
234-
}
235-
236-
pub fn try_get_bytes_from_os_str(input: &OsStr) -> Result<&[u8], NonUtf8OsStr> {
237-
let result = {
238-
#[cfg(target_family = "unix")]
239-
{
240-
use std::os::unix::ffi::OsStrExt;
241-
242-
Ok(input.as_bytes())
243-
}
244-
245-
#[cfg(not(target_family = "unix"))]
246-
{
247-
// TODO
248-
// Verify that this works correctly on these platforms
249-
match input.to_str().map(|st| st.as_bytes()) {
250-
Some(sl) => Ok(sl),
251-
None => {
252-
let cow = input.to_string_lossy();
253-
254-
Err(NonUtf8OsStr(cow.to_string()))
255-
}
256-
}
257-
}
258-
};
259-
260-
result
261-
}
262-
263209
#[cfg(test)]
264210
mod tests {
265211
use super::*;

src/uucore/src/lib/features/format/mod.rs

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,12 @@ pub mod human;
3737
pub mod num_format;
3838
mod spec;
3939

40+
pub use self::escape::{EscapedChar, OctalParsing};
4041
use crate::extendedbigdecimal::ExtendedBigDecimal;
41-
pub use argument::*;
42+
pub use argument::{FormatArgument, FormatArguments};
43+
44+
use self::{escape::parse_escape_code, num_format::Formatter};
45+
use crate::{NonUtf8OsStrError, OsStrConversionType, error::UError};
4246
pub use spec::Spec;
4347
use std::{
4448
error::Error,
@@ -50,13 +54,6 @@ use std::{
5054

5155
use os_display::Quotable;
5256

53-
use crate::error::UError;
54-
55-
pub use self::{
56-
escape::{EscapedChar, OctalParsing, parse_escape_code},
57-
num_format::Formatter,
58-
};
59-
6057
#[derive(Debug)]
6158
pub enum FormatError {
6259
SpecError(Vec<u8>),
@@ -74,7 +71,7 @@ pub enum FormatError {
7471
/// The hexadecimal characters represent a code point that cannot represent a
7572
/// Unicode character (e.g., a surrogate code point)
7673
InvalidCharacter(char, Vec<u8>),
77-
InvalidEncoding(NonUtf8OsStr),
74+
InvalidEncoding(NonUtf8OsStrError),
7875
}
7976

8077
impl Error for FormatError {}
@@ -86,8 +83,8 @@ impl From<std::io::Error> for FormatError {
8683
}
8784
}
8885

89-
impl From<NonUtf8OsStr> for FormatError {
90-
fn from(value: NonUtf8OsStr) -> FormatError {
86+
impl From<NonUtf8OsStrError> for FormatError {
87+
fn from(value: NonUtf8OsStrError) -> FormatError {
9188
FormatError::InvalidEncoding(value)
9289
}
9390
}
@@ -127,11 +124,18 @@ impl Display for FormatError {
127124
String::from_utf8_lossy(digits)
128125
),
129126
Self::InvalidEncoding(no) => {
130-
write!(
131-
f,
132-
"invalid (non-UTF-8) argument like {} encountered",
133-
no.0.quote()
134-
)
127+
use os_display::Quotable;
128+
129+
let quoted = no.input_lossy_string.quote();
130+
131+
match no.conversion_type {
132+
OsStrConversionType::ToBytes => f.write_fmt(format_args!(
133+
"invalid (non-UTF-8) argument like {quoted} encountered when converting argument to bytes on a platform that doesn't use UTF-8",
134+
)),
135+
OsStrConversionType::ToString => f.write_fmt(format_args!(
136+
"invalid (non-UTF-8) argument like {quoted} encountered",
137+
)),
138+
}
135139
}
136140
}
137141
}

src/uucore/src/lib/features/format/spec.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ use super::{
1111
self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix,
1212
UnsignedIntVariant,
1313
},
14-
parse_escape_only, try_get_bytes_from_os_str,
14+
parse_escape_only,
1515
};
1616
use crate::{
1717
format::FormatArguments,
18+
os_str_as_bytes_verbose,
1819
quoting_style::{QuotingStyle, escape_name},
1920
};
2021
use std::{io::Write, num::NonZero, ops::ControlFlow};
@@ -379,7 +380,7 @@ impl Spec {
379380

380381
let os_str = args.next_string(position);
381382

382-
let bytes = try_get_bytes_from_os_str(os_str).unwrap();
383+
let bytes = os_str_as_bytes_verbose(os_str)?;
383384

384385
let truncated = match precision {
385386
Some(p) if p < os_str.len() => &bytes[..p],
@@ -390,7 +391,7 @@ impl Spec {
390391
Self::EscapedString { position } => {
391392
let os_str = args.next_string(position);
392393

393-
let bytes = try_get_bytes_from_os_str(os_str).unwrap();
394+
let bytes = os_str_as_bytes_verbose(os_str)?;
394395

395396
let mut parsed = Vec::<u8>::new();
396397

@@ -414,7 +415,7 @@ impl Spec {
414415
show_control: false,
415416
},
416417
);
417-
let bytes = try_get_bytes_from_os_str(&s)?;
418+
let bytes = os_str_as_bytes_verbose(&s)?;
418419
writer.write_all(bytes).map_err(FormatError::IoError)
419420
}
420421
Self::SignedInt {

0 commit comments

Comments
 (0)