diff --git a/src/uu/echo/src/echo.rs b/src/uu/echo/src/echo.rs index 2386e87a0fc..ae0849b2ad3 100644 --- a/src/uu/echo/src/echo.rs +++ b/src/uu/echo/src/echo.rs @@ -8,10 +8,9 @@ use clap::{Arg, ArgAction, Command}; use std::env; use std::ffi::{OsStr, OsString}; use std::io::{self, StdoutLock, Write}; -use uucore::error::{UResult, USimpleError}; +use uucore::error::UResult; use uucore::format::{FormatChar, OctalParsing, parse_escape_only}; -use uucore::format_usage; -use uucore::os_str_as_bytes; +use uucore::{format_usage, os_str_as_bytes}; use uucore::locale::get_message; @@ -223,9 +222,9 @@ pub fn uu_app() -> Command { fn execute(stdout: &mut StdoutLock, args: Vec, options: Options) -> UResult<()> { for (i, arg) in args.into_iter().enumerate() { - let bytes = os_str_as_bytes(arg.as_os_str()) - .map_err(|_| USimpleError::new(1, get_message("echo-error-non-utf8")))?; + let bytes = os_str_as_bytes(&arg)?; + // Don't print a space before the first argument if i > 0 { stdout.write_all(b" ")?; } diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index d537350f19c..b9f464f1953 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -4,6 +4,7 @@ // file that was distributed with this source code. use clap::{Arg, ArgAction, Command}; use std::collections::HashMap; +use std::ffi::OsString; use std::io::stdout; use std::ops::ControlFlow; use uucore::error::{UResult, UUsageError}; @@ -18,21 +19,19 @@ mod options { pub const FORMAT: &str = "FORMAT"; pub const ARGUMENT: &str = "ARGUMENT"; } + #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let matches = uu_app().get_matches_from(args); let format = matches - .get_one::(options::FORMAT) + .get_one::(options::FORMAT) .ok_or_else(|| UUsageError::new(1, get_message("printf-error-missing-operand")))?; let format = os_str_as_bytes(format)?; - let values: Vec<_> = match matches.get_many::(options::ARGUMENT) { - // FIXME: use os_str_as_bytes once FormatArgument supports Vec + let values: Vec<_> = match matches.get_many::(options::ARGUMENT) { Some(s) => s - .map(|os_string| { - FormatArgument::Unparsed(std::ffi::OsStr::to_string_lossy(os_string).to_string()) - }) + .map(|os_string| FormatArgument::Unparsed(os_string.to_owned())) .collect(), None => vec![], }; @@ -62,7 +61,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { "{}", get_message_with_args( "printf-warning-ignoring-excess-arguments", - HashMap::from([("arg".to_string(), arg_str.to_string())]) + HashMap::from([("arg".to_string(), arg_str.to_string_lossy().to_string())]) ) ); } @@ -103,10 +102,10 @@ pub fn uu_app() -> Command { .help(get_message("printf-help-version")) .action(ArgAction::Version), ) - .arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString))) + .arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(OsString))) .arg( Arg::new(options::ARGUMENT) .action(ArgAction::Append) - .value_parser(clap::value_parser!(std::ffi::OsString)), + .value_parser(clap::value_parser!(OsString)), ) } diff --git a/src/uucore/src/lib/features/checksum.rs b/src/uucore/src/lib/features/checksum.rs index 214edc16cdf..6f134ab4e2f 100644 --- a/src/uucore/src/lib/features/checksum.rs +++ b/src/uucore/src/lib/features/checksum.rs @@ -968,7 +968,7 @@ fn process_checksum_line( cached_line_format: &mut Option, last_algo: &mut Option, ) -> Result<(), LineCheckError> { - let line_bytes = os_str_as_bytes(line)?; + let line_bytes = os_str_as_bytes(line).map_err(|e| LineCheckError::UError(Box::new(e)))?; // Early return on empty or commented lines. if line.is_empty() || line_bytes.starts_with(b"#") { diff --git a/src/uucore/src/lib/features/extendedbigdecimal.rs b/src/uucore/src/lib/features/extendedbigdecimal.rs index 396b6f35941..5748b6f1ab9 100644 --- a/src/uucore/src/lib/features/extendedbigdecimal.rs +++ b/src/uucore/src/lib/features/extendedbigdecimal.rs @@ -101,6 +101,18 @@ impl From for ExtendedBigDecimal { } } +impl From for ExtendedBigDecimal { + fn from(val: u8) -> Self { + Self::BigDecimal(val.into()) + } +} + +impl From for ExtendedBigDecimal { + fn from(val: u32) -> Self { + Self::BigDecimal(val.into()) + } +} + impl ExtendedBigDecimal { pub fn zero() -> Self { Self::BigDecimal(0.into()) diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 7baf9a9a67f..935f28826b3 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -7,12 +7,16 @@ use super::ExtendedBigDecimal; use crate::format::spec::ArgumentLocation; use crate::{ error::set_exit_code, + os_str_as_bytes, parser::num_parser::{ExtendedParser, ExtendedParserError}, quoting_style::{QuotingStyle, locale_aware_escape_name}, show_error, show_warning, }; use os_display::Quotable; -use std::{ffi::OsStr, num::NonZero}; +use std::{ + ffi::{OsStr, OsString}, + num::NonZero, +}; /// An argument for formatting /// @@ -24,12 +28,12 @@ use std::{ffi::OsStr, num::NonZero}; #[derive(Clone, Debug, PartialEq)] pub enum FormatArgument { Char(char), - String(String), + String(OsString), UnsignedInt(u64), SignedInt(i64), Float(ExtendedBigDecimal), /// Special argument that gets coerced into the other variants - Unparsed(String), + Unparsed(OsString), } /// A struct that holds a slice of format arguments and provides methods to access them @@ -72,22 +76,25 @@ impl<'a> FormatArguments<'a> { pub fn next_char(&mut self, position: &ArgumentLocation) -> u8 { match self.next_arg(position) { Some(FormatArgument::Char(c)) => *c as u8, - Some(FormatArgument::Unparsed(s)) => s.bytes().next().unwrap_or(b'\0'), + Some(FormatArgument::Unparsed(os)) => match os_str_as_bytes(os) { + Ok(bytes) => bytes.first().copied().unwrap_or(b'\0'), + Err(_) => b'\0', + }, _ => b'\0', } } - pub fn next_string(&mut self, position: &ArgumentLocation) -> &'a str { + pub fn next_string(&mut self, position: &ArgumentLocation) -> &'a OsStr { match self.next_arg(position) { - Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s, - _ => "", + Some(FormatArgument::Unparsed(os) | FormatArgument::String(os)) => os, + _ => "".as_ref(), } } pub fn next_i64(&mut self, position: &ArgumentLocation) -> i64 { match self.next_arg(position) { Some(FormatArgument::SignedInt(n)) => *n, - Some(FormatArgument::Unparsed(s)) => extract_value(i64::extended_parse(s), s), + Some(FormatArgument::Unparsed(os)) => Self::get_num::(os), _ => 0, } } @@ -95,25 +102,7 @@ impl<'a> FormatArguments<'a> { pub fn next_u64(&mut self, position: &ArgumentLocation) -> u64 { match self.next_arg(position) { Some(FormatArgument::UnsignedInt(n)) => *n, - Some(FormatArgument::Unparsed(s)) => { - // Check if the string is a character literal enclosed in quotes - if s.starts_with(['"', '\'']) { - // Extract the content between the quotes safely using chars - let mut chars = s.trim_matches(|c| c == '"' || c == '\'').chars(); - if let Some(first_char) = chars.next() { - if chars.clone().count() > 0 { - // Emit a warning if there are additional characters - let remaining: String = chars.collect(); - show_warning!( - "{remaining}: character(s) following character constant have been ignored" - ); - } - return first_char as u64; // Use only the first character - } - return 0; // Empty quotes - } - extract_value(u64::extended_parse(s), s) - } + Some(FormatArgument::Unparsed(os)) => Self::get_num::(os), _ => 0, } } @@ -121,13 +110,81 @@ impl<'a> FormatArguments<'a> { pub fn next_extended_big_decimal(&mut self, position: &ArgumentLocation) -> ExtendedBigDecimal { match self.next_arg(position) { Some(FormatArgument::Float(n)) => n.clone(), - Some(FormatArgument::Unparsed(s)) => { - extract_value(ExtendedBigDecimal::extended_parse(s), s) - } + Some(FormatArgument::Unparsed(os)) => Self::get_num::(os), _ => ExtendedBigDecimal::zero(), } } + // Parse an OsStr that we know to start with a '/" + fn parse_quote_start(os: &OsStr) -> Result> + where + T: ExtendedParser + From + From + Default, + { + // If this fails (this can only happens on Windows), then just + // return NotNumeric. + let s = match os_str_as_bytes(os) { + Ok(s) => s, + Err(_) => return Err(ExtendedParserError::NotNumeric), + }; + + let bytes = match s.split_first() { + Some((b'"', bytes)) | Some((b'\'', bytes)) => bytes, + _ => { + // This really can't happen, the string we are given must start with '/". + debug_assert!(false); + return Err(ExtendedParserError::NotNumeric); + } + }; + + if bytes.is_empty() { + return Err(ExtendedParserError::NotNumeric); + } + + let (val, len) = if let Some(c) = bytes + .utf8_chunks() + .next() + .expect("bytes should not be empty") + .valid() + .chars() + .next() + { + // Valid UTF-8 character, cast the codepoint to u32 then T + // (largest unicode codepoint is only 3 bytes, so this is safe) + ((c as u32).into(), c.len_utf8()) + } else { + // Not a valid UTF-8 character, use the first byte + (bytes[0].into(), 1) + }; + // Emit a warning if there are additional characters + if bytes.len() > len { + return Err(ExtendedParserError::PartialMatch( + val, + String::from_utf8_lossy(&bytes[len..]).into_owned(), + )); + } + + Ok(val) + } + + fn get_num(os: &OsStr) -> T + where + T: ExtendedParser + From + From + Default, + { + let s = os.to_string_lossy(); + let first = s.as_bytes().first().copied(); + + let quote_start = first == Some(b'"') || first == Some(b'\''); + let parsed = if quote_start { + // The string begins with a quote + Self::parse_quote_start(os) + } else { + T::extended_parse(&s) + }; + + // Get the best possible value, even if parsed was an error. + extract_value(parsed, &s, quote_start) + } + fn get_at_relative_position(&mut self, pos: NonZero) -> Option<&'a FormatArgument> { let pos: usize = pos.into(); let pos = (pos - 1).saturating_add(self.current_offset); @@ -147,7 +204,11 @@ impl<'a> FormatArguments<'a> { } } -fn extract_value(p: Result>, input: &str) -> T { +fn extract_value( + p: Result>, + input: &str, + quote_start: bool, +) -> T { match p { Ok(v) => v, Err(e) => { @@ -167,14 +228,15 @@ fn extract_value(p: Result>, input: &s Default::default() } ExtendedParserError::PartialMatch(v, rest) => { - let bytes = input.as_encoded_bytes(); - if !bytes.is_empty() && (bytes[0] == b'\'' || bytes[0] == b'"') { + if quote_start { + set_exit_code(0); show_warning!( "{rest}: character(s) following character constant have been ignored" ); } else { show_error!("{}: value not completely converted", input.quote()); } + v } } @@ -249,11 +311,11 @@ mod tests { // Test with different method types in sequence let args = [ FormatArgument::Char('a'), - FormatArgument::String("hello".to_string()), - FormatArgument::Unparsed("123".to_string()), - FormatArgument::String("world".to_string()), + FormatArgument::String("hello".into()), + FormatArgument::Unparsed("123".into()), + FormatArgument::String("world".into()), FormatArgument::Char('z'), - FormatArgument::String("test".to_string()), + FormatArgument::String("test".into()), ]; let mut args = FormatArguments::new(&args); @@ -384,10 +446,10 @@ mod tests { fn test_unparsed_arguments() { // Test with unparsed arguments that get coerced let args = [ - FormatArgument::Unparsed("hello".to_string()), - FormatArgument::Unparsed("123".to_string()), - FormatArgument::Unparsed("hello".to_string()), - FormatArgument::Unparsed("456".to_string()), + FormatArgument::Unparsed("hello".into()), + FormatArgument::Unparsed("123".into()), + FormatArgument::Unparsed("hello".into()), + FormatArgument::Unparsed("456".into()), ]; let mut args = FormatArguments::new(&args); @@ -409,10 +471,10 @@ mod tests { // Test with mixed types and positional access let args = [ FormatArgument::Char('a'), - FormatArgument::String("test".to_string()), + FormatArgument::String("test".into()), FormatArgument::UnsignedInt(42), FormatArgument::Char('b'), - FormatArgument::String("more".to_string()), + FormatArgument::String("more".into()), FormatArgument::UnsignedInt(99), ]; let mut args = FormatArguments::new(&args); diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 7abd9147555..532af34ef58 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -37,8 +37,12 @@ pub mod human; pub mod num_format; mod spec; +pub use self::escape::{EscapedChar, OctalParsing}; use crate::extendedbigdecimal::ExtendedBigDecimal; -pub use argument::*; +pub use argument::{FormatArgument, FormatArguments}; + +use self::{escape::parse_escape_code, num_format::Formatter}; +use crate::{NonUtf8OsStrError, error::UError}; pub use spec::Spec; use std::{ error::Error, @@ -50,13 +54,6 @@ use std::{ use os_display::Quotable; -use crate::error::UError; - -pub use self::{ - escape::{EscapedChar, OctalParsing, parse_escape_code}, - num_format::Formatter, -}; - #[derive(Debug)] pub enum FormatError { SpecError(Vec), @@ -74,6 +71,7 @@ pub enum FormatError { /// The hexadecimal characters represent a code point that cannot represent a /// Unicode character (e.g., a surrogate code point) InvalidCharacter(char, Vec), + InvalidEncoding(NonUtf8OsStrError), } impl Error for FormatError {} @@ -85,6 +83,12 @@ impl From for FormatError { } } +impl From for FormatError { + fn from(value: NonUtf8OsStrError) -> FormatError { + FormatError::InvalidEncoding(value) + } +} + impl Display for FormatError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -118,6 +122,7 @@ impl Display for FormatError { "invalid universal character name \\{escape_char}{}", String::from_utf8_lossy(digits) ), + Self::InvalidEncoding(no) => no.fmt(f), } } } diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 8322d26d739..98d455994d2 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -5,8 +5,6 @@ // spell-checker:ignore (vars) intmax ptrdiff padlen -use crate::quoting_style::{QuotingStyle, locale_aware_escape_name}; - use super::{ ExtendedBigDecimal, FormatChar, FormatError, OctalParsing, num_format::{ @@ -15,7 +13,11 @@ use super::{ }, parse_escape_only, }; -use crate::format::FormatArguments; +use crate::{ + format::FormatArguments, + os_str_as_bytes, + quoting_style::{QuotingStyle, locale_aware_escape_name}, +}; use std::{io::Write, num::NonZero, ops::ControlFlow}; /// A parsed specification for formatting a value @@ -375,22 +377,21 @@ impl Spec { // TODO: We need to not use Rust's formatting for aligning the output, // so that we can just write bytes to stdout without panicking. let precision = resolve_asterisk_precision(*precision, args); - let s = args.next_string(position); + let os_str = args.next_string(position); + let bytes = os_str_as_bytes(os_str)?; + let truncated = match precision { - Some(p) if p < s.len() => &s[..p], - _ => s, + Some(p) if p < os_str.len() => &bytes[..p], + _ => bytes, }; - write_padded( - writer, - truncated.as_bytes(), - width, - *align_left || neg_width, - ) + write_padded(writer, truncated, width, *align_left || neg_width) } Self::EscapedString { position } => { - let s = args.next_string(position); - let mut parsed = Vec::new(); - for c in parse_escape_only(s.as_bytes(), OctalParsing::ThreeDigits) { + let os_str = args.next_string(position); + let bytes = os_str_as_bytes(os_str)?; + let mut parsed = Vec::::new(); + + for c in parse_escape_only(bytes, OctalParsing::ThreeDigits) { match c.write(&mut parsed)? { ControlFlow::Continue(()) => {} ControlFlow::Break(()) => { @@ -403,15 +404,11 @@ impl Spec { } Self::QuotedString { position } => { let s = locale_aware_escape_name( - args.next_string(position).as_ref(), + args.next_string(position), QuotingStyle::SHELL_ESCAPE, ); - #[cfg(unix)] - let bytes = std::os::unix::ffi::OsStringExt::into_vec(s); - #[cfg(not(unix))] - let bytes = s.to_string_lossy().as_bytes().to_owned(); - - writer.write_all(&bytes).map_err(FormatError::IoError) + let bytes = os_str_as_bytes(&s)?; + writer.write_all(bytes).map_err(FormatError::IoError) } Self::SignedInt { width, @@ -646,7 +643,7 @@ mod tests { Some((42, false)), resolve_asterisk_width( Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), - &mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]), + &mut FormatArguments::new(&[FormatArgument::Unparsed("42".into())]), ) ); @@ -661,7 +658,7 @@ mod tests { Some((42, true)), resolve_asterisk_width( Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), - &mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]), + &mut FormatArguments::new(&[FormatArgument::Unparsed("-42".into())]), ) ); @@ -672,9 +669,9 @@ mod tests { NonZero::new(2).unwrap() ))), &mut FormatArguments::new(&[ - FormatArgument::Unparsed("1".to_string()), - FormatArgument::Unparsed("2".to_string()), - FormatArgument::Unparsed("3".to_string()) + FormatArgument::Unparsed("1".into()), + FormatArgument::Unparsed("2".into()), + FormatArgument::Unparsed("3".into()) ]), ) ); @@ -717,7 +714,7 @@ mod tests { Some(42), resolve_asterisk_precision( Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), - &mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]), + &mut FormatArguments::new(&[FormatArgument::Unparsed("42".into())]), ) ); @@ -732,7 +729,7 @@ mod tests { Some(0), resolve_asterisk_precision( Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)), - &mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]), + &mut FormatArguments::new(&[FormatArgument::Unparsed("-42".into())]), ) ); assert_eq!( @@ -742,9 +739,9 @@ mod tests { NonZero::new(2).unwrap() ))), &mut FormatArguments::new(&[ - FormatArgument::Unparsed("1".to_string()), - FormatArgument::Unparsed("2".to_string()), - FormatArgument::Unparsed("3".to_string()) + FormatArgument::Unparsed("1".into()), + FormatArgument::Unparsed("2".into()), + FormatArgument::Unparsed("3".into()) ]), ) ); diff --git a/src/uucore/src/lib/features/parser/num_parser.rs b/src/uucore/src/lib/features/parser/num_parser.rs index 597f4b24522..5f7d895380e 100644 --- a/src/uucore/src/lib/features/parser/num_parser.rs +++ b/src/uucore/src/lib/features/parser/num_parser.rs @@ -109,12 +109,12 @@ impl Base { /// Type returned if a number could not be parsed in its entirety #[derive(Debug, PartialEq)] -pub enum ExtendedParserError<'a, T> { +pub enum ExtendedParserError { /// The input as a whole makes no sense NotNumeric, /// The beginning of the input made sense and has been parsed, /// while the remaining doesn't. - PartialMatch(T, &'a str), + PartialMatch(T, String), /// The value has overflowed the type storage. The returned value /// is saturated (e.g. positive or negative infinity, or min/max /// value for the integer type). @@ -124,7 +124,7 @@ pub enum ExtendedParserError<'a, T> { Underflow(T), } -impl<'a, T> ExtendedParserError<'a, T> +impl ExtendedParserError where T: Zero, { @@ -143,12 +143,12 @@ where /// conversion. fn map( self, - f: impl FnOnce(T) -> Result>, - ) -> ExtendedParserError<'a, U> + f: impl FnOnce(T) -> Result>, + ) -> ExtendedParserError where U: Zero, { - fn extract(v: Result>) -> U + fn extract(v: Result>) -> U where U: Zero, { @@ -172,15 +172,15 @@ where /// and `f64` float, where octal and binary formats are not allowed. pub trait ExtendedParser { // We pick a hopefully different name for our parser, to avoid clash with standard traits. - fn extended_parse(input: &str) -> Result> + fn extended_parse(input: &str) -> Result> where Self: Sized; } impl ExtendedParser for i64 { /// Parse a number as i64. No fractional part is allowed. - fn extended_parse(input: &str) -> Result> { - fn into_i64<'a>(ebd: ExtendedBigDecimal) -> Result> { + fn extended_parse(input: &str) -> Result> { + fn into_i64(ebd: ExtendedBigDecimal) -> Result> { match ebd { ExtendedBigDecimal::BigDecimal(bd) => { let (digits, scale) = bd.into_bigint_and_scale(); @@ -214,8 +214,8 @@ impl ExtendedParser for i64 { impl ExtendedParser for u64 { /// Parse a number as u64. No fractional part is allowed. - fn extended_parse(input: &str) -> Result> { - fn into_u64<'a>(ebd: ExtendedBigDecimal) -> Result> { + fn extended_parse(input: &str) -> Result> { + fn into_u64(ebd: ExtendedBigDecimal) -> Result> { match ebd { ExtendedBigDecimal::BigDecimal(bd) => { let (digits, scale) = bd.into_bigint_and_scale(); @@ -251,8 +251,8 @@ impl ExtendedParser for u64 { impl ExtendedParser for f64 { /// Parse a number as f64 - fn extended_parse(input: &str) -> Result> { - fn into_f64<'a>(ebd: ExtendedBigDecimal) -> Result> { + fn extended_parse(input: &str) -> Result> { + fn into_f64(ebd: ExtendedBigDecimal) -> Result> { // TODO: _Some_ of this is generic, so this should probably be implemented as an ExtendedBigDecimal trait (ToPrimitive). let v = match ebd { ExtendedBigDecimal::BigDecimal(bd) => { @@ -285,7 +285,7 @@ impl ExtendedParser for ExtendedBigDecimal { /// Parse a number as an ExtendedBigDecimal fn extended_parse( input: &str, - ) -> Result> { + ) -> Result> { parse(input, ParseTarget::Decimal, &[]) } } @@ -349,11 +349,11 @@ fn parse_suffix_multiplier<'a>(str: &'a str, allowed_suffixes: &[(char, u32)]) - (1, str) } -fn parse_special_value<'a>( - input: &'a str, +fn parse_special_value( + input: &str, negative: bool, allowed_suffixes: &[(char, u32)], -) -> Result> { +) -> Result> { let input_lc = input.to_ascii_lowercase(); // Array of ("String to match", return value when sign positive, when sign negative) @@ -376,7 +376,7 @@ fn parse_special_value<'a>( return if rest.is_empty() { Ok(special) } else { - Err(ExtendedParserError::PartialMatch(special, rest)) + Err(ExtendedParserError::PartialMatch(special, rest.to_string())) }; } } @@ -386,7 +386,7 @@ fn parse_special_value<'a>( /// Underflow/Overflow errors always contain 0 or infinity. /// overflow: true for overflow, false for underflow. -fn make_error<'a>(overflow: bool, negative: bool) -> ExtendedParserError<'a, ExtendedBigDecimal> { +fn make_error(overflow: bool, negative: bool) -> ExtendedParserError { let mut v = if overflow { ExtendedBigDecimal::Infinity } else { @@ -468,13 +468,13 @@ fn pow_with_context(bd: &BigDecimal, exp: i64, ctx: &Context) -> BigDecimal { } /// Construct an [`ExtendedBigDecimal`] based on parsed data -fn construct_extended_big_decimal<'a>( +fn construct_extended_big_decimal( digits: BigUint, negative: bool, base: Base, scale: i64, exponent: BigInt, -) -> Result> { +) -> Result> { if digits == BigUint::zero() { // Return return 0 if the digits are zero. In particular, we do not ever // return Overflow/Underflow errors in that case. @@ -541,25 +541,13 @@ pub(crate) enum ParseTarget { Duration, } -pub(crate) fn parse<'a>( - input: &'a str, +pub(crate) fn parse( + input: &str, target: ParseTarget, allowed_suffixes: &[(char, u32)], -) -> Result> { - // Parse the " and ' prefixes separately - if target != ParseTarget::Duration { - if let Some(rest) = input.strip_prefix(['\'', '"']) { - let mut chars = rest.char_indices().fuse(); - let v = chars - .next() - .map(|(_, c)| ExtendedBigDecimal::BigDecimal(u32::from(c).into())); - return match (v, chars.next()) { - (Some(v), None) => Ok(v), - (Some(v), Some((i, _))) => Err(ExtendedParserError::PartialMatch(v, &rest[i..])), - (None, _) => Err(ExtendedParserError::NotNumeric), - }; - } - } +) -> Result> { + // Note: literals with ' and " prefixes are parsed earlier on in argument parsing, + // before UTF-8 conversion. let trimmed_input = input.trim_ascii_start(); @@ -616,7 +604,7 @@ pub(crate) fn parse<'a>( } else { ExtendedBigDecimal::zero() }; - return Err(ExtendedParserError::PartialMatch(ebd, partial)); + return Err(ExtendedParserError::PartialMatch(ebd, partial.to_string())); } return if target == ParseTarget::Integral { @@ -640,7 +628,7 @@ pub(crate) fn parse<'a>( } else { Err(ExtendedParserError::PartialMatch( ebd_result.unwrap_or_else(|e| e.extract()), - rest, + rest.to_string(), )) } } @@ -686,14 +674,14 @@ mod tests { u64::extended_parse(""), Err(ExtendedParserError::NotNumeric) )); - assert!(matches!( + assert_eq!( u64::extended_parse("123.15"), - Err(ExtendedParserError::PartialMatch(123, ".15")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(123, ".15".to_string())) + ); + assert_eq!( u64::extended_parse("123e10"), - Err(ExtendedParserError::PartialMatch(123, "e10")) - )); + Err(ExtendedParserError::PartialMatch(123, "e10".to_string())) + ); } #[test] @@ -707,18 +695,18 @@ mod tests { )); assert_eq!(Ok(i64::MAX), i64::extended_parse(&format!("{}", i64::MAX))); assert_eq!(Ok(i64::MIN), i64::extended_parse(&format!("{}", i64::MIN))); - assert!(matches!( + assert_eq!( i64::extended_parse(&format!("{}", u64::MAX)), Err(ExtendedParserError::Overflow(i64::MAX)) - )); + ); assert!(matches!( i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)), Err(ExtendedParserError::Overflow(i64::MAX)) )); - assert!(matches!( + assert_eq!( i64::extended_parse("-123e10"), - Err(ExtendedParserError::PartialMatch(-123, "e10")) - )); + Err(ExtendedParserError::PartialMatch(-123, "e10".to_string())) + ); assert!(matches!( i64::extended_parse(&format!("{}", -(u64::MAX as i128))), Err(ExtendedParserError::Overflow(i64::MIN)) @@ -770,20 +758,34 @@ mod tests { Ok(0.15), f64::extended_parse(".150000000000000000000000000231313") ); - assert!(matches!(f64::extended_parse("123.15e"), - Err(ExtendedParserError::PartialMatch(f, "e")) if f == 123.15)); - assert!(matches!(f64::extended_parse("123.15E"), - Err(ExtendedParserError::PartialMatch(f, "E")) if f == 123.15)); - assert!(matches!(f64::extended_parse("123.15e-"), - Err(ExtendedParserError::PartialMatch(f, "e-")) if f == 123.15)); - assert!(matches!(f64::extended_parse("123.15e+"), - Err(ExtendedParserError::PartialMatch(f, "e+")) if f == 123.15)); - assert!(matches!(f64::extended_parse("123.15e."), - Err(ExtendedParserError::PartialMatch(f, "e.")) if f == 123.15)); - assert!(matches!(f64::extended_parse("1.2.3"), - Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2)); - assert!(matches!(f64::extended_parse("123.15p5"), - Err(ExtendedParserError::PartialMatch(f, "p5")) if f == 123.15)); + assert_eq!( + f64::extended_parse("123.15e"), + Err(ExtendedParserError::PartialMatch(123.15, "e".to_string())) + ); + assert_eq!( + f64::extended_parse("123.15E"), + Err(ExtendedParserError::PartialMatch(123.15, "E".to_string())) + ); + assert_eq!( + f64::extended_parse("123.15e-"), + Err(ExtendedParserError::PartialMatch(123.15, "e-".to_string())) + ); + assert_eq!( + f64::extended_parse("123.15e+"), + Err(ExtendedParserError::PartialMatch(123.15, "e+".to_string())) + ); + assert_eq!( + f64::extended_parse("123.15e."), + Err(ExtendedParserError::PartialMatch(123.15, "e.".to_string())) + ); + assert_eq!( + f64::extended_parse("1.2.3"), + Err(ExtendedParserError::PartialMatch(1.2, ".3".to_string())) + ); + assert_eq!( + f64::extended_parse("123.15p5"), + Err(ExtendedParserError::PartialMatch(123.15, "p5".to_string())) + ); // Minus zero. 0.0 == -0.0 so we explicitly check the sign. assert_eq!(Ok(0.0), f64::extended_parse("-0.0")); assert!(f64::extended_parse("-0.0").unwrap().is_sign_negative()); @@ -806,10 +808,20 @@ mod tests { assert!(f64::extended_parse("nan").unwrap().is_sign_positive()); assert!(f64::extended_parse("NAN").unwrap().is_nan()); assert!(f64::extended_parse("NAN").unwrap().is_sign_positive()); - assert!(matches!(f64::extended_parse("-infinit"), - Err(ExtendedParserError::PartialMatch(f, "init")) if f == f64::NEG_INFINITY)); - assert!(matches!(f64::extended_parse("-infinity00"), - Err(ExtendedParserError::PartialMatch(f, "00")) if f == f64::NEG_INFINITY)); + assert_eq!( + f64::extended_parse("-infinit"), + Err(ExtendedParserError::PartialMatch( + f64::NEG_INFINITY, + "init".to_string() + )) + ); + assert_eq!( + f64::extended_parse("-infinity00"), + Err(ExtendedParserError::PartialMatch( + f64::NEG_INFINITY, + "00".to_string() + )) + ); assert!(f64::extended_parse(&format!("{}", u64::MAX)).is_ok()); assert!(f64::extended_parse(&format!("{}", i64::MIN)).is_ok()); @@ -994,14 +1006,22 @@ mod tests { // but we can check that the number still gets parsed properly: 0x0.8e5 is 0x8e5 / 16**3 assert_eq!(Ok(0.555908203125), f64::extended_parse("0x0.8e5")); - assert!(matches!(f64::extended_parse("0x0.1p"), - Err(ExtendedParserError::PartialMatch(f, "p")) if f == 0.0625)); - assert!(matches!(f64::extended_parse("0x0.1p-"), - Err(ExtendedParserError::PartialMatch(f, "p-")) if f == 0.0625)); - assert!(matches!(f64::extended_parse("0x.1p+"), - Err(ExtendedParserError::PartialMatch(f, "p+")) if f == 0.0625)); - assert!(matches!(f64::extended_parse("0x.1p."), - Err(ExtendedParserError::PartialMatch(f, "p.")) if f == 0.0625)); + assert_eq!( + f64::extended_parse("0x0.1p"), + Err(ExtendedParserError::PartialMatch(0.0625, "p".to_string())) + ); + assert_eq!( + f64::extended_parse("0x0.1p-"), + Err(ExtendedParserError::PartialMatch(0.0625, "p-".to_string())) + ); + assert_eq!( + f64::extended_parse("0x.1p+"), + Err(ExtendedParserError::PartialMatch(0.0625, "p+".to_string())) + ); + assert_eq!( + f64::extended_parse("0x.1p."), + Err(ExtendedParserError::PartialMatch(0.0625, "p.".to_string())) + ); assert_eq!( Ok(ExtendedBigDecimal::BigDecimal( @@ -1061,40 +1081,58 @@ mod tests { )); // Not actually hex numbers, but the prefixes look like it. - assert!(matches!(f64::extended_parse("0x"), - Err(ExtendedParserError::PartialMatch(f, "x")) if f == 0.0)); - assert!(matches!(f64::extended_parse("0x."), - Err(ExtendedParserError::PartialMatch(f, "x.")) if f == 0.0)); - assert!(matches!(f64::extended_parse("0xp"), - Err(ExtendedParserError::PartialMatch(f, "xp")) if f == 0.0)); - assert!(matches!(f64::extended_parse("0xp-2"), - Err(ExtendedParserError::PartialMatch(f, "xp-2")) if f == 0.0)); - assert!(matches!(f64::extended_parse("0x.p-2"), - Err(ExtendedParserError::PartialMatch(f, "x.p-2")) if f == 0.0)); - assert!(matches!(f64::extended_parse("0X"), - Err(ExtendedParserError::PartialMatch(f, "X")) if f == 0.0)); - assert!(matches!(f64::extended_parse("-0x"), - Err(ExtendedParserError::PartialMatch(f, "x")) if f == -0.0)); - assert!(matches!(f64::extended_parse("+0x"), - Err(ExtendedParserError::PartialMatch(f, "x")) if f == 0.0)); - assert!(matches!(f64::extended_parse("-0x."), - Err(ExtendedParserError::PartialMatch(f, "x.")) if f == -0.0)); - assert!(matches!( + assert_eq!( + f64::extended_parse("0x"), + Err(ExtendedParserError::PartialMatch(0.0, "x".to_string())) + ); + assert_eq!( + f64::extended_parse("0x."), + Err(ExtendedParserError::PartialMatch(0.0, "x.".to_string())) + ); + assert_eq!( + f64::extended_parse("0xp"), + Err(ExtendedParserError::PartialMatch(0.0, "xp".to_string())) + ); + assert_eq!( + f64::extended_parse("0xp-2"), + Err(ExtendedParserError::PartialMatch(0.0, "xp-2".to_string())) + ); + assert_eq!( + f64::extended_parse("0x.p-2"), + Err(ExtendedParserError::PartialMatch(0.0, "x.p-2".to_string())) + ); + assert_eq!( + f64::extended_parse("0X"), + Err(ExtendedParserError::PartialMatch(0.0, "X".to_string())) + ); + assert_eq!( + f64::extended_parse("-0x"), + Err(ExtendedParserError::PartialMatch(0.0, "x".to_string())) + ); + assert_eq!( + f64::extended_parse("+0x"), + Err(ExtendedParserError::PartialMatch(0.0, "x".to_string())) + ); + assert_eq!( + f64::extended_parse("-0x."), + Err(ExtendedParserError::PartialMatch(-0.0, "x.".to_string())) + ); + assert_eq!( u64::extended_parse("0x"), - Err(ExtendedParserError::PartialMatch(0, "x")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(0, "x".to_string())) + ); + assert_eq!( u64::extended_parse("-0x"), - Err(ExtendedParserError::PartialMatch(0, "x")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(0, "x".to_string())) + ); + assert_eq!( i64::extended_parse("0x"), - Err(ExtendedParserError::PartialMatch(0, "x")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(0, "x".to_string())) + ); + assert_eq!( i64::extended_parse("-0x"), - Err(ExtendedParserError::PartialMatch(0, "x")) - )); + Err(ExtendedParserError::PartialMatch(0, "x".to_string())) + ); } #[test] @@ -1105,18 +1143,18 @@ mod tests { assert_eq!(Ok(-0o123), i64::extended_parse("-0123")); assert_eq!(Ok(0o123), u64::extended_parse("00123")); assert_eq!(Ok(0), u64::extended_parse("00")); - assert!(matches!( + assert_eq!( u64::extended_parse("008"), - Err(ExtendedParserError::PartialMatch(0, "8")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(0, "8".to_string())) + ); + assert_eq!( u64::extended_parse("08"), - Err(ExtendedParserError::PartialMatch(0, "8")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(0, "8".to_string())) + ); + assert_eq!( u64::extended_parse("0."), - Err(ExtendedParserError::PartialMatch(0, ".")) - )); + Err(ExtendedParserError::PartialMatch(0, ".".to_string())) + ); // No float tests, leading zeros get parsed as decimal anyway. } @@ -1128,51 +1166,62 @@ mod tests { assert_eq!(Ok(0b1011), u64::extended_parse("+0b1011")); assert_eq!(Ok(-0b1011), i64::extended_parse("-0b1011")); - assert!(matches!( + assert_eq!( u64::extended_parse("0b"), - Err(ExtendedParserError::PartialMatch(0, "b")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(0, "b".to_string())) + ); + assert_eq!( u64::extended_parse("0b."), - Err(ExtendedParserError::PartialMatch(0, "b.")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(0, "b.".to_string())) + ); + assert_eq!( u64::extended_parse("-0b"), - Err(ExtendedParserError::PartialMatch(0, "b")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(0, "b".to_string())) + ); + assert_eq!( i64::extended_parse("0b"), - Err(ExtendedParserError::PartialMatch(0, "b")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(0, "b".to_string())) + ); + assert_eq!( i64::extended_parse("-0b"), - Err(ExtendedParserError::PartialMatch(0, "b")) - )); + Err(ExtendedParserError::PartialMatch(0, "b".to_string())) + ); // Binary not allowed for floats - assert!(matches!( + assert_eq!( f64::extended_parse("0b100"), - Err(ExtendedParserError::PartialMatch(0f64, "b100")) - )); - assert!(matches!( + Err(ExtendedParserError::PartialMatch(0f64, "b100".to_string())) + ); + assert_eq!( f64::extended_parse("0b100.1"), - Err(ExtendedParserError::PartialMatch(0f64, "b100.1")) - )); + Err(ExtendedParserError::PartialMatch( + 0f64, + "b100.1".to_string() + )) + ); - assert!(match ExtendedBigDecimal::extended_parse("0b100.1") { - Err(ExtendedParserError::PartialMatch(ebd, "b100.1")) => - ebd == ExtendedBigDecimal::zero(), - _ => false, - }); + assert_eq!( + ExtendedBigDecimal::extended_parse("0b100.1"), + Err(ExtendedParserError::PartialMatch( + ExtendedBigDecimal::zero(), + "b100.1".to_string() + )) + ); - assert!(match ExtendedBigDecimal::extended_parse("0b") { - Err(ExtendedParserError::PartialMatch(ebd, "b")) => ebd == ExtendedBigDecimal::zero(), - _ => false, - }); - assert!(match ExtendedBigDecimal::extended_parse("0b.") { - Err(ExtendedParserError::PartialMatch(ebd, "b.")) => ebd == ExtendedBigDecimal::zero(), - _ => false, - }); + assert_eq!( + ExtendedBigDecimal::extended_parse("0b"), + Err(ExtendedParserError::PartialMatch( + ExtendedBigDecimal::zero(), + "b".to_string() + )) + ); + assert_eq!( + ExtendedBigDecimal::extended_parse("0b."), + Err(ExtendedParserError::PartialMatch( + ExtendedBigDecimal::zero(), + "b.".to_string() + )) + ); } #[test] @@ -1185,15 +1234,15 @@ mod tests { // Ensure that trailing whitespace is still a partial match assert_eq!( - Err(ExtendedParserError::PartialMatch(6, " ")), + Err(ExtendedParserError::PartialMatch(6, " ".to_string())), u64::extended_parse("0x6 ") ); assert_eq!( - Err(ExtendedParserError::PartialMatch(7, "\t")), + Err(ExtendedParserError::PartialMatch(7, "\t".to_string())), u64::extended_parse("0x7\t") ); assert_eq!( - Err(ExtendedParserError::PartialMatch(8, "\n")), + Err(ExtendedParserError::PartialMatch(8, "\n".to_string())), u64::extended_parse("0x8\n") ); diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 8059dac9355..6cebe5e3c45 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -311,23 +311,39 @@ pub fn read_yes() -> bool { } } +#[derive(Debug)] +pub struct NonUtf8OsStrError { + input_lossy_string: String, +} + +impl std::fmt::Display for NonUtf8OsStrError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use os_display::Quotable; + let quoted = self.input_lossy_string.quote(); + f.write_fmt(format_args!( + "invalid UTF-8 input {quoted} encountered when converting to bytes on a platform that doesn't expose byte arguments", + )) + } +} + +impl std::error::Error for NonUtf8OsStrError {} +impl error::UError for NonUtf8OsStrError {} + /// Converts an `OsStr` to a UTF-8 `&[u8]`. /// /// This always succeeds on unix platforms, /// and fails on other platforms if the string can't be coerced to UTF-8. -pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { +pub fn os_str_as_bytes(os_string: &OsStr) -> Result<&[u8], NonUtf8OsStrError> { #[cfg(unix)] - let bytes = os_string.as_bytes(); + return Ok(os_string.as_bytes()); #[cfg(not(unix))] - let bytes = os_string + os_string .to_str() - .ok_or_else(|| { - mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments") - })? - .as_bytes(); - - Ok(bytes) + .ok_or_else(|| NonUtf8OsStrError { + input_lossy_string: os_string.to_string_lossy().into_owned(), + }) + .map(|s| s.as_bytes()) } /// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes. @@ -336,15 +352,13 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { /// and wraps [`OsStr::to_string_lossy`] on non-unix platforms. pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> { #[cfg(unix)] - let bytes = Cow::from(os_string.as_bytes()); + return Cow::from(os_string.as_bytes()); #[cfg(not(unix))] - let bytes = match os_string.to_string_lossy() { + match os_string.to_string_lossy() { Cow::Borrowed(slice) => Cow::from(slice.as_bytes()), Cow::Owned(owned) => Cow::from(owned.into_bytes()), - }; - - bytes + } } /// Converts a `&[u8]` to an `&OsStr`, @@ -354,13 +368,12 @@ pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> { /// and fails on other platforms if the bytes can't be parsed as UTF-8. pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult> { #[cfg(unix)] - let os_str = Cow::Borrowed(OsStr::from_bytes(bytes)); - #[cfg(not(unix))] - let os_str = Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(|_| { - mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr") - })?)); + return Ok(Cow::Borrowed(OsStr::from_bytes(bytes))); - Ok(os_str) + #[cfg(not(unix))] + Ok(Cow::Owned(OsString::from(str::from_utf8(bytes).map_err( + |_| mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr"), + )?))) } /// Converts a `Vec` into an `OsString`, parsing as UTF-8 on non-unix platforms. @@ -369,13 +382,12 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult> { /// and fails on other platforms if the bytes can't be parsed as UTF-8. pub fn os_string_from_vec(vec: Vec) -> mods::error::UResult { #[cfg(unix)] - let s = OsString::from_vec(vec); + return Ok(OsString::from_vec(vec)); + #[cfg(not(unix))] - let s = OsString::from(String::from_utf8(vec).map_err(|_| { + Ok(OsString::from(String::from_utf8(vec).map_err(|_| { mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments") - })?); - - Ok(s) + })?)) } /// Converts an `OsString` into a `Vec`, parsing as UTF-8 on non-unix platforms. diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 559a803bf33..5e2c0c7a0bd 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -805,7 +805,7 @@ fn test_overflow() { fn partial_char() { new_ucmd!() .args(&["%d", "'abc"]) - .fails_with_code(1) + .succeeds() .stdout_is("97") .stderr_is( "printf: warning: bc: character(s) following character constant have been ignored\n", @@ -1293,23 +1293,80 @@ fn float_arg_with_whitespace() { #[test] fn mb_input() { - for format in ["\"á", "\'á", "'\u{e1}"] { + let cases = vec![ + ("%04x\n", "\"á", "00e1\n"), + ("%04x\n", "'á", "00e1\n"), + ("%04x\n", "'\u{e1}", "00e1\n"), + ("%i\n", "\"á", "225\n"), + ("%i\n", "'á", "225\n"), + ("%i\n", "'\u{e1}", "225\n"), + ("%f\n", "'á", "225.000000\n"), + ]; + for (format, arg, stdout) in cases { new_ucmd!() - .args(&["%04x\n", format]) + .args(&[format, arg]) .succeeds() - .stdout_only("00e1\n"); + .stdout_only(stdout); } let cases = vec![ - ("\"á=", "="), - ("\'á-", "-"), - ("\'á=-==", "=-=="), - ("'\u{e1}++", "++"), + ("%04x\n", "\"á=", "00e1\n", "="), + ("%04x\n", "'á-", "00e1\n", "-"), + ("%04x\n", "'á=-==", "00e1\n", "=-=="), + ("%04x\n", "'á'", "00e1\n", "'"), + ("%04x\n", "'\u{e1}++", "00e1\n", "++"), + ("%04x\n", "''á'", "0027\n", "á'"), + ("%i\n", "\"á=", "225\n", "="), ]; + for (format, arg, stdout, stderr) in cases { + new_ucmd!() + .args(&[format, arg]) + .succeeds() + .stdout_is(stdout) + .stderr_is(format!("printf: warning: {stderr}: character(s) following character constant have been ignored\n")); + } - for (format, expected) in cases { + for arg in ["\"", "'"] { new_ucmd!() - .args(&["%04x\n", format]) + .args(&["%04x\n", arg]) + .fails() + .stderr_contains("expected a numeric value"); + } +} + +#[test] +#[cfg(target_family = "unix")] +fn mb_invalid_unicode() { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + let cases = vec![ + ("%04x\n", b"\"\xe1", "00e1\n"), + ("%04x\n", b"'\xe1", "00e1\n"), + ("%i\n", b"\"\xe1", "225\n"), + ("%i\n", b"'\xe1", "225\n"), + ("%f\n", b"'\xe1", "225.000000\n"), + ]; + for (format, arg, stdout) in cases { + new_ucmd!() + .arg(format) + .arg(OsStr::from_bytes(arg)) + .succeeds() + .stdout_only(stdout); + } + + let cases = vec![ + (b"\"\xe1=".as_slice(), "="), + (b"'\xe1-".as_slice(), "-"), + (b"'\xe1=-==".as_slice(), "=-=="), + (b"'\xe1'".as_slice(), "'"), + // unclear if original or replacement character is better in stderr + //(b"''\xe1'".as_slice(), "'�'"), + ]; + for (arg, expected) in cases { + new_ucmd!() + .arg("%04x\n") + .arg(OsStr::from_bytes(arg)) .succeeds() .stdout_is("00e1\n") .stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n")); @@ -1364,3 +1421,35 @@ fn positional_format_specifiers() { .succeeds() .stdout_only("Octal: 115, Int: 42, Float: 3.141590, String: hello, Hex: ff, Scientific: 1.000000e-05, Char: A, Unsigned: 100, Integer: 123"); } + +#[test] +#[cfg(target_family = "unix")] +fn non_utf_8_input() { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + // ISO-8859-1 encoded text + // spell-checker:disable + const INPUT_AND_OUTPUT: &[u8] = + b"Swer an rehte g\xFCete wendet s\xEEn gem\xFCete, dem volget s\xE6lde und \xEAre."; + // spell-checker:enable + + let os_str = OsStr::from_bytes(INPUT_AND_OUTPUT); + + new_ucmd!() + .arg("%s") + .arg(os_str) + .succeeds() + .stdout_only_bytes(INPUT_AND_OUTPUT); + + new_ucmd!() + .arg(os_str) + .succeeds() + .stdout_only_bytes(INPUT_AND_OUTPUT); + + new_ucmd!() + .arg("%d") + .arg(os_str) + .fails() + .stderr_contains("expected a numeric value"); +} diff --git a/util/why-error.md b/util/why-error.md index c4c371d070f..0fdff867a97 100644 --- a/util/why-error.md +++ b/util/why-error.md @@ -38,11 +38,7 @@ This file documents why some tests are failing: * gnu/tests/mv/part-hardlink.sh * gnu/tests/od/od-N.sh * gnu/tests/od/od-float.sh -* gnu/tests/printf/printf-cov.pl -* gnu/tests/printf/printf-indexed.sh -* gnu/tests/printf/printf-mb.sh * gnu/tests/printf/printf-quote.sh -* gnu/tests/printf/printf.sh * gnu/tests/ptx/ptx-overrun.sh * gnu/tests/ptx/ptx.pl * gnu/tests/rm/empty-inacc.sh - https://github.com/uutils/coreutils/issues/7033