diff --git a/src/uu/printf/src/printf.rs b/src/uu/printf/src/printf.rs index f278affaede..5664bc29221 100644 --- a/src/uu/printf/src/printf.rs +++ b/src/uu/printf/src/printf.rs @@ -2,9 +2,14 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. + use clap::{crate_version, Arg, ArgAction, Command}; use std::io::stdout; use std::ops::ControlFlow; +#[cfg(unix)] +use std::os::unix::ffi::{OsStrExt, OsStringExt}; +#[cfg(windows)] +use std::os::windows::ffi::OsStrExt; use uucore::error::{UResult, UUsageError}; use uucore::format::{parse_spec_and_escape, FormatArgument, FormatItem}; use uucore::{format_usage, help_about, help_section, help_usage}; @@ -19,23 +24,46 @@ mod options { pub const FORMAT: &str = "FORMAT"; pub const ARGUMENT: &str = "ARGUMENT"; } - #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let matches = uu_app().get_matches_from(args); let format = matches - .get_one::(options::FORMAT) + .get_one::(options::FORMAT) .ok_or_else(|| UUsageError::new(1, "missing operand"))?; - let values: Vec<_> = match matches.get_many::(options::ARGUMENT) { - Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(), + #[cfg(unix)] + let format = format.as_bytes(); + + #[cfg(windows)] + let format = format.as_os_str().as_bytes_lossy(); + + let values: Vec<_> = match matches.get_many::(options::ARGUMENT) { + Some(s) => s + .map(|os_str| { + #[cfg(unix)] + { + let raw_bytes: Vec = os_str.clone().into_vec(); + FormatArgument::Unparsed( + String::from_utf8(raw_bytes.clone()) + .unwrap_or_else(|_| raw_bytes.iter().map(|&b| b as char).collect()), + ) + } + #[cfg(windows)] + { + let raw_bytes: Vec = os_str.as_os_str().as_bytes().to_vec(); + FormatArgument::Unparsed(String::from_utf8_lossy(&raw_bytes).into_owned()) + } + }) + .collect(), None => vec![], }; let mut format_seen = false; let mut args = values.iter().peekable(); - for item in parse_spec_and_escape(format.as_ref()) { + + // Parse and process the format string + for item in parse_spec_and_escape(format) { if let Ok(FormatItem::Spec(_)) = item { format_seen = true; } @@ -52,7 +80,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } while args.peek().is_some() { - for item in parse_spec_and_escape(format.as_ref()) { + for item in parse_spec_and_escape(format) { match item?.write(stdout(), &mut args)? { ControlFlow::Continue(()) => {} ControlFlow::Break(()) => return Ok(()), @@ -83,6 +111,10 @@ pub fn uu_app() -> Command { .help("Print version information") .action(ArgAction::Version), ) - .arg(Arg::new(options::FORMAT)) - .arg(Arg::new(options::ARGUMENT).action(ArgAction::Append)) + .arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString))) + .arg( + Arg::new(options::ARGUMENT) + .action(ArgAction::Append) + .value_parser(clap::value_parser!(std::ffi::OsString)), + ) } diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 5cdd0342122..400cc7b95d9 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -56,7 +56,26 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::UnsignedInt(n) => *n, - FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_u64(s), s), + FormatArgument::Unparsed(s) => { + // Check if the string is a character literal enclosed in quotes + if s.starts_with(['"', '\'']) { + // Extract the content between the quotes safely using chars + let mut chars = s.trim_matches(|c| c == '"' || c == '\'').chars(); + if let Some(first_char) = chars.next() { + if chars.clone().count() > 0 { + // Emit a warning if there are additional characters + let remaining: String = chars.collect(); + show_warning!( + "{}: character(s) following character constant have been ignored", + remaining + ); + } + return first_char as u64; // Use only the first character + } + return 0; // Empty quotes + } + extract_value(ParsedNumber::parse_u64(s), s) + } _ => 0, } } diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 9b29c404ca8..917487e308c 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -12,45 +12,6 @@ fn basic_literal() { .stdout_only("hello world"); } -#[test] -fn escaped_tab() { - new_ucmd!() - .args(&["hello\\t world"]) - .succeeds() - .stdout_only("hello\t world"); -} - -#[test] -fn escaped_newline() { - new_ucmd!() - .args(&["hello\\n world"]) - .succeeds() - .stdout_only("hello\n world"); -} - -#[test] -fn escaped_slash() { - new_ucmd!() - .args(&["hello\\\\ world"]) - .succeeds() - .stdout_only("hello\\ world"); -} - -#[test] -fn unescaped_double_quote() { - new_ucmd!().args(&["\\\""]).succeeds().stdout_only("\""); -} - -#[test] -fn escaped_hex() { - new_ucmd!().args(&["\\x41"]).succeeds().stdout_only("A"); -} - -#[test] -fn escaped_octal() { - new_ucmd!().args(&["\\101"]).succeeds().stdout_only("A"); -} - #[test] fn escaped_unicode_four_digit() { new_ucmd!().args(&["\\u0125"]).succeeds().stdout_only("ĥ"); @@ -77,38 +38,6 @@ fn escaped_unrecognized() { new_ucmd!().args(&["c\\d"]).succeeds().stdout_only("c\\d"); } -#[test] -fn sub_string() { - new_ucmd!() - .args(&["hello %s", "world"]) - .succeeds() - .stdout_only("hello world"); -} - -#[test] -fn sub_multi_field() { - new_ucmd!() - .args(&["%s %s", "hello", "world"]) - .succeeds() - .stdout_only("hello world"); -} - -#[test] -fn sub_repeat_format_str() { - new_ucmd!() - .args(&["%s.", "hello", "world"]) - .succeeds() - .stdout_only("hello.world."); -} - -#[test] -fn sub_string_ignore_escapes() { - new_ucmd!() - .args(&["hello %s", "\\tworld"]) - .succeeds() - .stdout_only("hello \\tworld"); -} - #[test] fn sub_b_string_handle_escapes() { new_ucmd!() @@ -496,27 +425,11 @@ fn sub_any_asterisk_hex_arg() { } #[test] -fn sub_any_specifiers_no_params() { - new_ucmd!() - .args(&["%ztlhLji", "3"]) //spell-checker:disable-line - .succeeds() - .stdout_only("3"); -} - -#[test] -fn sub_any_specifiers_after_first_param() { - new_ucmd!() - .args(&["%0ztlhLji", "3"]) //spell-checker:disable-line - .succeeds() - .stdout_only("3"); -} - -#[test] -fn sub_any_specifiers_after_period() { - new_ucmd!() - .args(&["%0.ztlhLji", "3"]) //spell-checker:disable-line - .succeeds() - .stdout_only("3"); +fn sub_any_specifiers() { + // spell-checker:disable-next-line + for format in ["%ztlhLji", "%0ztlhLji", "%0.ztlhLji"] { + new_ucmd!().args(&[format, "3"]).succeeds().stdout_only("3"); + } } #[test] @@ -764,33 +677,23 @@ fn pad_string() { } #[test] -fn format_spec_zero_char_fails() { - // It is invalid to have the format spec '%0c' - new_ucmd!().args(&["%0c", "3"]).fails().code_is(1); -} - -#[test] -fn format_spec_zero_string_fails() { - // It is invalid to have the format spec '%0s' - new_ucmd!().args(&["%0s", "3"]).fails().code_is(1); -} - -#[test] -fn invalid_precision_fails() { - // It is invalid to have length of output string greater than i32::MAX - new_ucmd!() - .args(&["%.*d", "2147483648", "0"]) - .fails() - .stderr_is("printf: invalid precision: '2147483648'\n"); +fn format_spec_zero_fails() { + // It is invalid to have the format spec + for format in ["%0c", "%0s"] { + new_ucmd!().args(&[format, "3"]).fails().code_is(1); + } } #[test] -fn float_invalid_precision_fails() { +fn invalid_precision_tests() { // It is invalid to have length of output string greater than i32::MAX - new_ucmd!() - .args(&["%.*f", "2147483648", "0"]) - .fails() - .stderr_is("printf: invalid precision: '2147483648'\n"); + for format in ["%.*d", "%.*f"] { + let expected_error = "printf: invalid precision: '2147483648'\n"; + new_ucmd!() + .args(&[format, "2147483648", "0"]) + .fails() + .stderr_is(expected_error); + } } // The following padding-tests test for the cases in which flags in ['0', ' '] are given. @@ -963,3 +866,69 @@ fn float_switch_switch_decimal_scientific() { .succeeds() .stdout_only("1e-05"); } + +#[test] +fn mb_input() { + for format in ["\"á", "\'á", "'\u{e1}"] { + new_ucmd!() + .args(&["%04x\n", format]) + .succeeds() + .stdout_only("00e1\n"); + } + + let cases = vec![ + ("\"á=", "="), + ("\'á-", "-"), + ("\'á=-==", "=-=="), + ("'\u{e1}++", "++"), + ]; + + for (format, expected) in cases { + new_ucmd!() + .args(&["%04x\n", format]) + .succeeds() + .stdout_is("00e1\n") + .stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n")); + } +} + +#[test] +fn escaped_characters() { + fn test_escaped_character(input: &str, expected: &str) { + new_ucmd!().args(&[input]).succeeds().stdout_only(expected); + } + + let cases = vec![ + ("hello\\t world", "hello\t world"), + ("hello\\n world", "hello\n world"), + ("hello\\\\ world", "hello\\ world"), + ("\\\"", "\""), + ("\\x41", "A"), + ("\\101", "A"), + ]; + + for (input, expected) in cases { + test_escaped_character(input, expected); + } +} + +#[test] +fn substitution_tests() { + fn test_substitution(format: &str, args: Vec<&str>, expected: &str) { + let mut cmd = new_ucmd!(); + cmd.args(&[format]); + for arg in args { + cmd.args(&[arg]); + } + cmd.succeeds().stdout_only(expected); + } + let cases = vec![ + ("%s %s", vec!["hello", "world"], "hello world"), + ("%s.", vec!["hello", "world"], "hello.world."), + ("hello %s", vec!["\\tworld"], "hello \\tworld"), + ]; + + for (format, args, expected) in cases { + test_substitution(format, args, expected); + } +}