44// file that was distributed with this source code.
55
66use crate :: errors:: * ;
7- use crate :: format:: format_and_print ;
7+ use crate :: format:: { format_and_print_delimited , format_and_print_whitespace } ;
88use crate :: options:: * ;
99use crate :: units:: { Result , Unit } ;
10- use clap:: { Arg , ArgAction , ArgMatches , Command , parser:: ValueSource } ;
10+ use clap:: { Arg , ArgAction , ArgMatches , Command , builder:: ValueParser , parser:: ValueSource } ;
11+ use std:: ffi:: OsString ;
1112use std:: io:: { BufRead , Error , Write } ;
1213use std:: result:: Result as StdResult ;
1314use std:: str:: FromStr ;
1415
1516use units:: { IEC_BASES , SI_BASES } ;
1617use uucore:: display:: Quotable ;
1718use uucore:: error:: UResult ;
19+ use uucore:: os_str_as_bytes;
1820use uucore:: translate;
1921
2022use uucore:: parser:: shortcut_value_parser:: ShortcutValueParser ;
@@ -26,7 +28,7 @@ pub mod format;
2628pub mod options;
2729mod units;
2830
29- fn handle_args < ' a > ( args : impl Iterator < Item = & ' a str > , options : & NumfmtOptions ) -> UResult < ( ) > {
31+ fn handle_args < ' a > ( args : impl Iterator < Item = & ' a [ u8 ] > , options : & NumfmtOptions ) -> UResult < ( ) > {
3032 for l in args {
3133 format_and_handle_validation ( l, options) ?;
3234 }
@@ -37,40 +39,45 @@ fn handle_buffer<R>(input: R, options: &NumfmtOptions) -> UResult<()>
3739where
3840 R : BufRead ,
3941{
40- if options. zero_terminated {
41- handle_buffer_iterator (
42- input
43- . split ( 0 )
44- // FIXME: This panics on UTF8 decoding, but this util in general doesn't handle
45- // invalid UTF8
46- . map ( |bytes| Ok ( String :: from_utf8 ( bytes?) . unwrap ( ) ) ) ,
47- options,
48- )
49- } else {
50- handle_buffer_iterator ( input. lines ( ) , options)
51- }
42+ let terminator = if options. zero_terminated { 0u8 } else { b'\n' } ;
43+ handle_buffer_iterator ( input. split ( terminator) , options, terminator)
5244}
5345
5446fn handle_buffer_iterator (
55- iter : impl Iterator < Item = StdResult < String , Error > > ,
47+ iter : impl Iterator < Item = StdResult < Vec < u8 > , Error > > ,
5648 options : & NumfmtOptions ,
49+ terminator : u8 ,
5750) -> UResult < ( ) > {
58- let eol = if options. zero_terminated { '\0' } else { '\n' } ;
5951 for ( idx, line_result) in iter. enumerate ( ) {
6052 match line_result {
6153 Ok ( line) if idx < options. header => {
62- print ! ( "{line}{eol}" ) ;
54+ std:: io:: stdout ( ) . write_all ( & line) ?;
55+ std:: io:: stdout ( ) . write_all ( & [ terminator] ) ?;
6356 Ok ( ( ) )
6457 }
65- Ok ( line) => format_and_handle_validation ( line. as_ref ( ) , options) ,
58+ Ok ( line) => format_and_handle_validation ( & line, options) ,
6659 Err ( err) => return Err ( Box :: new ( NumfmtError :: IoError ( err. to_string ( ) ) ) ) ,
6760 } ?;
6861 }
6962 Ok ( ( ) )
7063}
7164
72- fn format_and_handle_validation ( input_line : & str , options : & NumfmtOptions ) -> UResult < ( ) > {
73- let handled_line = format_and_print ( input_line, options) ;
65+ fn format_and_handle_validation ( input_line : & [ u8 ] , options : & NumfmtOptions ) -> UResult < ( ) > {
66+ let eol = if options. zero_terminated {
67+ b'\0'
68+ } else {
69+ b'\n'
70+ } ;
71+
72+ let handled_line = if options. delimiter . is_some ( ) {
73+ format_and_print_delimited ( input_line, options)
74+ } else {
75+ // Whitespace mode requires valid UTF-8
76+ match std:: str:: from_utf8 ( input_line) {
77+ Ok ( s) => format_and_print_whitespace ( s, options) ,
78+ Err ( _) => Err ( translate ! ( "numfmt-error-invalid-input" ) ) ,
79+ }
80+ } ;
7481
7582 if let Err ( error_message) = handled_line {
7683 match options. invalid {
@@ -85,7 +92,8 @@ fn format_and_handle_validation(input_line: &str, options: &NumfmtOptions) -> UR
8592 }
8693 InvalidModes :: Ignore => { }
8794 }
88- println ! ( "{input_line}" ) ;
95+ std:: io:: stdout ( ) . write_all ( input_line) ?;
96+ std:: io:: stdout ( ) . write_all ( & [ eol] ) ?;
8997 }
9098
9199 Ok ( ( ) )
@@ -150,6 +158,22 @@ fn parse_unit_size_suffix(s: &str) -> Option<usize> {
150158 None
151159}
152160
161+ /// Parse delimiter argument, ensuring it's a single character.
162+ /// For non-UTF8 locales, we allow up to 4 bytes (max UTF-8 char length).
163+ fn parse_delimiter ( arg : & OsString ) -> Result < Vec < u8 > > {
164+ let bytes = os_str_as_bytes ( arg) . map_err ( |e| e. to_string ( ) ) ?;
165+ // TODO: Cut, NL and here need to find a better way to do locale specific character count
166+ if arg. to_str ( ) . is_some_and ( |s| s. chars ( ) . count ( ) > 1 )
167+ || ( arg. to_str ( ) . is_none ( ) && bytes. len ( ) > 4 )
168+ {
169+ Err ( translate ! (
170+ "numfmt-error-delimiter-must-be-single-character"
171+ ) )
172+ } else {
173+ Ok ( bytes. to_vec ( ) )
174+ }
175+ }
176+
153177fn parse_options ( args : & ArgMatches ) -> Result < NumfmtOptions > {
154178 let from = parse_unit ( args. get_one :: < String > ( FROM ) . unwrap ( ) ) ?;
155179 let to = parse_unit ( args. get_one :: < String > ( TO ) . unwrap ( ) ) ?;
@@ -212,15 +236,10 @@ fn parse_options(args: &ArgMatches) -> Result<NumfmtOptions> {
212236 ) ) ;
213237 }
214238
215- let delimiter = args. get_one :: < String > ( DELIMITER ) . map_or ( Ok ( None ) , |arg| {
216- if arg. len ( ) == 1 {
217- Ok ( Some ( arg. to_owned ( ) ) )
218- } else {
219- Err ( translate ! (
220- "numfmt-error-delimiter-must-be-single-character"
221- ) )
222- }
223- } ) ?;
239+ let delimiter = args
240+ . get_one :: < OsString > ( DELIMITER )
241+ . map ( parse_delimiter)
242+ . transpose ( ) ?;
224243
225244 // unwrap is fine because the argument has a default value
226245 let round = match args. get_one :: < String > ( ROUND ) . unwrap ( ) . as_str ( ) {
@@ -264,8 +283,14 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
264283
265284 let options = parse_options ( & matches) . map_err ( NumfmtError :: IllegalArgument ) ?;
266285
267- let result = match matches. get_many :: < String > ( NUMBER ) {
268- Some ( values) => handle_args ( values. map ( |s| s. as_str ( ) ) , & options) ,
286+ let result = match matches. get_many :: < OsString > ( NUMBER ) {
287+ Some ( values) => {
288+ let byte_args: Vec < & [ u8 ] > = values
289+ . map ( |s| os_str_as_bytes ( s) . map_err ( |e| e. to_string ( ) ) )
290+ . collect :: < std:: result:: Result < Vec < _ > , _ > > ( )
291+ . map_err ( NumfmtError :: IllegalArgument ) ?;
292+ handle_args ( byte_args. into_iter ( ) , & options)
293+ }
269294 None => {
270295 let stdin = std:: io:: stdin ( ) ;
271296 let mut locked_stdin = stdin. lock ( ) ;
@@ -296,6 +321,7 @@ pub fn uu_app() -> Command {
296321 . short ( 'd' )
297322 . long ( DELIMITER )
298323 . value_name ( "X" )
324+ . value_parser ( ValueParser :: os_string ( ) )
299325 . help ( translate ! ( "numfmt-help-delimiter" ) ) ,
300326 )
301327 . arg (
@@ -397,7 +423,12 @@ pub fn uu_app() -> Command {
397423 . help ( translate ! ( "numfmt-help-zero-terminated" ) )
398424 . action ( ArgAction :: SetTrue ) ,
399425 )
400- . arg ( Arg :: new ( NUMBER ) . hide ( true ) . action ( ArgAction :: Append ) )
426+ . arg (
427+ Arg :: new ( NUMBER )
428+ . hide ( true )
429+ . action ( ArgAction :: Append )
430+ . value_parser ( ValueParser :: os_string ( ) ) ,
431+ )
401432}
402433
403434#[ cfg( test) ]
@@ -528,7 +559,7 @@ mod tests {
528559
529560 #[ test]
530561 fn args_fail_returns_status_2_for_invalid_input ( ) {
531- let input_value = [ "5" , "4Q" ] . into_iter ( ) ;
562+ let input_value = [ b "5". as_slice ( ) , b "4Q"] . into_iter ( ) ;
532563 let mut options = get_valid_options ( ) ;
533564 options. invalid = InvalidModes :: Fail ;
534565 handle_args ( input_value, & options) . unwrap ( ) ;
@@ -541,7 +572,7 @@ mod tests {
541572
542573 #[ test]
543574 fn args_warn_returns_status_0_for_invalid_input ( ) {
544- let input_value = [ "5" , "4Q" ] . into_iter ( ) ;
575+ let input_value = [ b "5". as_slice ( ) , b "4Q"] . into_iter ( ) ;
545576 let mut options = get_valid_options ( ) ;
546577 options. invalid = InvalidModes :: Warn ;
547578 let result = handle_args ( input_value, & options) ;
0 commit comments