88use clap:: { Arg , ArgAction , Command } ;
99use std:: ffi:: OsString ;
1010use std:: fs:: File ;
11- use std:: io:: { self , ErrorKind , Read , Seek , Write } ;
11+ use std:: io:: { self , BufReader , ErrorKind , Read , Write } ;
1212use std:: path:: { Path , PathBuf } ;
1313use uucore:: display:: Quotable ;
1414use uucore:: encoding:: {
@@ -28,6 +28,8 @@ pub const BASE_CMD_PARSE_ERROR: i32 = 1;
2828///
2929/// This default is only used if no "-w"/"--wrap" argument is passed
3030pub const WRAP_DEFAULT : usize = 76 ;
31+ // Fixed to 8 KiB (equivalent to std::io::DEFAULT_BUF_SIZE on most targets)
32+ pub const DEFAULT_BUFFER_SIZE : usize = 8 * 1024 ;
3133
3234pub struct Config {
3335 pub decode : bool ,
@@ -149,64 +151,63 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
149151 )
150152}
151153
152- /// A trait alias for types that implement both `Read` and `Seek`.
153- pub trait ReadSeek : Read + Seek { }
154-
155- /// Automatically implement the `ReadSeek` trait for any type that implements both `Read` and `Seek`.
156- impl < T : Read + Seek > ReadSeek for T { }
157-
158- pub fn get_input ( config : & Config ) -> UResult < Box < dyn ReadSeek > > {
154+ pub fn get_input ( config : & Config ) -> UResult < Box < dyn Read > > {
159155 match & config. to_read {
160156 Some ( path_buf) => {
161- // Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode`
162157 let file =
163158 File :: open ( path_buf) . map_err_context ( || path_buf. maybe_quote ( ) . to_string ( ) ) ?;
164- Ok ( Box :: new ( file) )
159+ Ok ( Box :: new ( BufReader :: new ( file) ) )
165160 }
166161 None => {
167- let mut buffer = Vec :: new ( ) ;
168- io:: stdin ( ) . read_to_end ( & mut buffer) ?;
169- Ok ( Box :: new ( io:: Cursor :: new ( buffer) ) )
162+ // Stdin is already buffered by the OS; wrap once more to reduce syscalls per read.
163+ Ok ( Box :: new ( BufReader :: new ( io:: stdin ( ) ) ) )
170164 }
171165 }
172166}
173-
174- /// Determines if the input buffer contains any padding ('=') ignoring trailing whitespace.
175- fn read_and_has_padding < R : Read > ( input : & mut R ) -> UResult < ( bool , Vec < u8 > ) > {
176- let mut buf = Vec :: new ( ) ;
177- input
178- . read_to_end ( & mut buf)
179- . map_err ( |err| USimpleError :: new ( 1 , format_read_error ( err. kind ( ) ) ) ) ?;
180-
181- // Treat the stream as padded if any '=' exists (GNU coreutils continues decoding
182- // even when padding bytes are followed by more data).
183- let has_padding = buf. contains ( & b'=' ) ;
184-
185- Ok ( ( has_padding, buf) )
186- }
187-
188- pub fn handle_input < R : Read + Seek > ( input : & mut R , format : Format , config : Config ) -> UResult < ( ) > {
189- let ( has_padding, read) = read_and_has_padding ( input) ?;
190-
167+ pub fn handle_input < R : Read > ( input : & mut R , format : Format , config : Config ) -> UResult < ( ) > {
168+ // Always allow padding for Base64 to avoid a full pre-scan of the input.
191169 let supports_fast_decode_and_encode =
192- get_supports_fast_decode_and_encode ( format, config. decode , has_padding ) ;
170+ get_supports_fast_decode_and_encode ( format, config. decode , true ) ;
193171
194172 let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode. as_ref ( ) ;
195173 let mut stdout_lock = io:: stdout ( ) . lock ( ) ;
196- let result = if config. decode {
197- fast_decode:: fast_decode (
198- read,
174+ let result = match ( format, config. decode ) {
175+ // Base58 must process the entire input as one big integer; keep the
176+ // historical behaviour of buffering everything for this format only.
177+ ( Format :: Base58 , _) => {
178+ let mut buffered = Vec :: new ( ) ;
179+ input
180+ . read_to_end ( & mut buffered)
181+ . map_err ( |err| USimpleError :: new ( 1 , format_read_error ( err. kind ( ) ) ) ) ?;
182+ if config. decode {
183+ fast_decode:: fast_decode_buffer (
184+ buffered,
185+ & mut stdout_lock,
186+ supports_fast_decode_and_encode_ref,
187+ config. ignore_garbage ,
188+ )
189+ } else {
190+ fast_encode:: fast_encode_buffer (
191+ buffered,
192+ & mut stdout_lock,
193+ supports_fast_decode_and_encode_ref,
194+ config. wrap_cols ,
195+ )
196+ }
197+ }
198+ // Streaming path for all other encodings keeps memory bounded.
199+ ( _, true ) => fast_decode:: fast_decode_stream (
200+ input,
199201 & mut stdout_lock,
200202 supports_fast_decode_and_encode_ref,
201203 config. ignore_garbage ,
202- )
203- } else {
204- fast_encode:: fast_encode (
205- read,
204+ ) ,
205+ ( _, false ) => fast_encode:: fast_encode_stream (
206+ input,
206207 & mut stdout_lock,
207208 supports_fast_decode_and_encode_ref,
208209 config. wrap_cols ,
209- )
210+ ) ,
210211 } ;
211212
212213 // Ensure any pending stdout buffer is flushed even if decoding failed; GNU basenc
@@ -296,14 +297,17 @@ pub fn get_supports_fast_decode_and_encode(
296297}
297298
298299pub mod fast_encode {
299- use crate :: base_common:: WRAP_DEFAULT ;
300+ use crate :: base_common:: { DEFAULT_BUFFER_SIZE , WRAP_DEFAULT } ;
300301 use std:: {
301302 cmp:: min,
302303 collections:: VecDeque ,
303- io:: { self , Write } ,
304+ io:: { self , Read , Write } ,
304305 num:: NonZeroUsize ,
305306 } ;
306- use uucore:: { encoding:: SupportsFastDecodeAndEncode , error:: UResult } ;
307+ use uucore:: {
308+ encoding:: SupportsFastDecodeAndEncode ,
309+ error:: { UResult , USimpleError } ,
310+ } ;
307311
308312 struct LineWrapping {
309313 line_length : NonZeroUsize ,
@@ -405,7 +409,7 @@ pub mod fast_encode {
405409 }
406410 // End of helper functions
407411
408- pub fn fast_encode (
412+ pub fn fast_encode_buffer (
409413 input : Vec < u8 > ,
410414 output : & mut dyn Write ,
411415 supports_fast_decode_and_encode : & dyn SupportsFastDecodeAndEncode ,
@@ -506,10 +510,90 @@ pub mod fast_encode {
506510 }
507511 Ok ( ( ) )
508512 }
513+
514+ pub fn fast_encode_stream (
515+ input : & mut dyn Read ,
516+ output : & mut dyn Write ,
517+ supports_fast_decode_and_encode : & dyn SupportsFastDecodeAndEncode ,
518+ wrap : Option < usize > ,
519+ ) -> UResult < ( ) > {
520+ const ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE : usize = 1_024 ;
521+
522+ let encode_in_chunks_of_size =
523+ supports_fast_decode_and_encode. unpadded_multiple ( ) * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE ;
524+
525+ assert ! ( encode_in_chunks_of_size > 0 ) ;
526+
527+ let mut line_wrapping = match wrap {
528+ Some ( 0 ) => None ,
529+ Some ( an) => Some ( LineWrapping {
530+ line_length : NonZeroUsize :: new ( an) . unwrap ( ) ,
531+ print_buffer : Vec :: < u8 > :: new ( ) ,
532+ } ) ,
533+ None => Some ( LineWrapping {
534+ line_length : NonZeroUsize :: new ( WRAP_DEFAULT ) . unwrap ( ) ,
535+ print_buffer : Vec :: < u8 > :: new ( ) ,
536+ } ) ,
537+ } ;
538+
539+ // Buffers
540+ let mut leftover_buffer = VecDeque :: < u8 > :: new ( ) ;
541+ let mut encoded_buffer = VecDeque :: < u8 > :: new ( ) ;
542+
543+ let mut read_buffer = vec ! [ 0u8 ; encode_in_chunks_of_size. max( DEFAULT_BUFFER_SIZE ) ] ;
544+
545+ loop {
546+ let read = input
547+ . read ( & mut read_buffer)
548+ . map_err ( |err| USimpleError :: new ( 1 , super :: format_read_error ( err. kind ( ) ) ) ) ?;
549+ if read == 0 {
550+ break ;
551+ }
552+
553+ leftover_buffer. extend ( & read_buffer[ ..read] ) ;
554+
555+ while leftover_buffer. len ( ) >= encode_in_chunks_of_size {
556+ {
557+ let contiguous = leftover_buffer. make_contiguous ( ) ;
558+ encode_in_chunks_to_buffer (
559+ supports_fast_decode_and_encode,
560+ & contiguous[ ..encode_in_chunks_of_size] ,
561+ & mut encoded_buffer,
562+ ) ?;
563+ }
564+
565+ // Drop the data we just encoded
566+ leftover_buffer. drain ( ..encode_in_chunks_of_size) ;
567+
568+ write_to_output (
569+ & mut line_wrapping,
570+ & mut encoded_buffer,
571+ output,
572+ false ,
573+ wrap == Some ( 0 ) ,
574+ ) ?;
575+ }
576+ }
577+
578+ // Encode any remaining bytes and flush
579+ supports_fast_decode_and_encode
580+ . encode_to_vec_deque ( leftover_buffer. make_contiguous ( ) , & mut encoded_buffer) ?;
581+
582+ write_to_output (
583+ & mut line_wrapping,
584+ & mut encoded_buffer,
585+ output,
586+ true ,
587+ wrap == Some ( 0 ) ,
588+ ) ?;
589+
590+ Ok ( ( ) )
591+ }
509592}
510593
511594pub mod fast_decode {
512- use std:: io:: { self , Write } ;
595+ use crate :: base_common:: DEFAULT_BUFFER_SIZE ;
596+ use std:: io:: { self , Read , Write } ;
513597 use uucore:: {
514598 encoding:: SupportsFastDecodeAndEncode ,
515599 error:: { UResult , USimpleError } ,
@@ -579,7 +663,7 @@ pub mod fast_decode {
579663 }
580664 // End of helper functions
581665
582- pub fn fast_decode (
666+ pub fn fast_decode_buffer (
583667 input : Vec < u8 > ,
584668 output : & mut dyn Write ,
585669 supports_fast_decode_and_encode : & dyn SupportsFastDecodeAndEncode ,
@@ -671,6 +755,123 @@ pub mod fast_decode {
671755
672756 Ok ( ( ) )
673757 }
758+
759+ pub fn fast_decode_stream (
760+ input : & mut dyn Read ,
761+ output : & mut dyn Write ,
762+ supports_fast_decode_and_encode : & dyn SupportsFastDecodeAndEncode ,
763+ ignore_garbage : bool ,
764+ ) -> UResult < ( ) > {
765+ const DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE : usize = 1_024 ;
766+
767+ let alphabet = supports_fast_decode_and_encode. alphabet ( ) ;
768+ let alphabet_table = alphabet_lookup ( alphabet) ;
769+ let valid_multiple = supports_fast_decode_and_encode. valid_decoding_multiple ( ) ;
770+ let decode_in_chunks_of_size = valid_multiple * DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE ;
771+
772+ assert ! ( decode_in_chunks_of_size > 0 ) ;
773+ assert ! ( valid_multiple > 0 ) ;
774+
775+ let supports_partial_decode = supports_fast_decode_and_encode. supports_partial_decode ( ) ;
776+
777+ let mut buffer = Vec :: with_capacity ( decode_in_chunks_of_size) ;
778+ let mut decoded_buffer = Vec :: < u8 > :: new ( ) ;
779+ let mut read_buffer = [ 0u8 ; DEFAULT_BUFFER_SIZE ] ;
780+
781+ loop {
782+ let read = input
783+ . read ( & mut read_buffer)
784+ . map_err ( |err| USimpleError :: new ( 1 , super :: format_read_error ( err. kind ( ) ) ) ) ?;
785+ if read == 0 {
786+ break ;
787+ }
788+
789+ for & byte in & read_buffer[ ..read] {
790+ if byte == b'\n' || byte == b'\r' {
791+ continue ;
792+ }
793+
794+ if alphabet_table[ usize:: from ( byte) ] {
795+ buffer. push ( byte) ;
796+ } else if ignore_garbage {
797+ continue ;
798+ } else {
799+ if supports_partial_decode {
800+ flush_ready_chunks (
801+ & mut buffer,
802+ decode_in_chunks_of_size,
803+ valid_multiple,
804+ supports_fast_decode_and_encode,
805+ & mut decoded_buffer,
806+ output,
807+ ) ?;
808+ } else {
809+ while buffer. len ( ) >= decode_in_chunks_of_size {
810+ decode_in_chunks_to_buffer (
811+ supports_fast_decode_and_encode,
812+ & buffer[ ..decode_in_chunks_of_size] ,
813+ & mut decoded_buffer,
814+ ) ?;
815+ write_to_output ( & mut decoded_buffer, output) ?;
816+ buffer. drain ( ..decode_in_chunks_of_size) ;
817+ }
818+ }
819+ return Err ( USimpleError :: new ( 1 , "error: invalid input" . to_owned ( ) ) ) ;
820+ }
821+
822+ if supports_partial_decode {
823+ flush_ready_chunks (
824+ & mut buffer,
825+ decode_in_chunks_of_size,
826+ valid_multiple,
827+ supports_fast_decode_and_encode,
828+ & mut decoded_buffer,
829+ output,
830+ ) ?;
831+ } else if buffer. len ( ) == decode_in_chunks_of_size {
832+ decode_in_chunks_to_buffer (
833+ supports_fast_decode_and_encode,
834+ & buffer,
835+ & mut decoded_buffer,
836+ ) ?;
837+ write_to_output ( & mut decoded_buffer, output) ?;
838+ buffer. clear ( ) ;
839+ }
840+ }
841+ }
842+
843+ if supports_partial_decode {
844+ flush_ready_chunks (
845+ & mut buffer,
846+ decode_in_chunks_of_size,
847+ valid_multiple,
848+ supports_fast_decode_and_encode,
849+ & mut decoded_buffer,
850+ output,
851+ ) ?;
852+ }
853+
854+ if !buffer. is_empty ( ) {
855+ let mut owned_chunk: Option < Vec < u8 > > = None ;
856+ let mut had_invalid_tail = false ;
857+
858+ if let Some ( pad_result) = supports_fast_decode_and_encode. pad_remainder ( & buffer) {
859+ had_invalid_tail = pad_result. had_invalid_tail ;
860+ owned_chunk = Some ( pad_result. chunk ) ;
861+ }
862+
863+ let final_chunk = owned_chunk. as_deref ( ) . unwrap_or ( & buffer) ;
864+
865+ supports_fast_decode_and_encode. decode_into_vec ( final_chunk, & mut decoded_buffer) ?;
866+ write_to_output ( & mut decoded_buffer, output) ?;
867+
868+ if had_invalid_tail {
869+ return Err ( USimpleError :: new ( 1 , "error: invalid input" . to_owned ( ) ) ) ;
870+ }
871+ }
872+
873+ Ok ( ( ) )
874+ }
674875}
675876
676877fn format_read_error ( kind : ErrorKind ) -> String {
@@ -692,6 +893,21 @@ fn format_read_error(kind: ErrorKind) -> String {
692893 translate ! ( "base-common-read-error" , "error" => kind_string_capitalized)
693894}
694895
896+ /// Determines if the input buffer contains any padding ('=') ignoring trailing whitespace.
897+ #[ cfg( test) ]
898+ fn read_and_has_padding < R : Read > ( input : & mut R ) -> UResult < ( bool , Vec < u8 > ) > {
899+ let mut buf = Vec :: new ( ) ;
900+ input
901+ . read_to_end ( & mut buf)
902+ . map_err ( |err| USimpleError :: new ( 1 , format_read_error ( err. kind ( ) ) ) ) ?;
903+
904+ // Treat the stream as padded if any '=' exists (GNU coreutils continues decoding
905+ // even when padding bytes are followed by more data).
906+ let has_padding = buf. contains ( & b'=' ) ;
907+
908+ Ok ( ( has_padding, buf) )
909+ }
910+
695911#[ cfg( test) ]
696912mod tests {
697913 use crate :: base_common:: read_and_has_padding;
0 commit comments