Skip to content

Commit 3de9411

Browse files
authored
base(nc|32|64): Optimize performances reduction memset (#9632)
* perf(base32): optimize read buffer allocation in fast encode/decode Refactor buffer creation from zero-initialized vectors to pre-allocated Vec with_capacity, using unsafe set_len to avoid unnecessary zeroing, improving performance without affecting correctness, as only initialized bytes from Read::read are accessed. * refactor: use MaybeUninit for safer buffer handling in base32 encode/decode Replaced manual unsafe `set_len` calls and direct reads into uninitialized vectors with `MaybeUninit::slice_assume_init_mut` to prevent potential memory safety issues and improve code reliability in `fast_encode` and `fast_decode` modules. Added buffer clearing to ensure proper reuse. * refactor(base32): replace MaybeUninit::slice_assume_init_mut with slice::from_raw_parts_mut Replace unsafe usage of `MaybeUninit::slice_assume_init_mut` with `slice::from_raw_parts_mut` in the fast_encode and fast_decode modules for reading data into the spare capacity of buffers. This change maintains safety guarantees through updated comments while potentially improving code clarity and performance by avoiding MaybeUninit initialization assumptions. The modification ensures the buffer's uninitialized tail is correctly handled as raw bytes during I/O operations. * refactor(base32): reorder std imports in base_common.rs for consistency Moved the `slice` import from after `collections::VecDeque` to after `num::NonZeroUsize` to better align with the module's import grouping style. * refactor(base32): remove unsafe buffer handling in encode/decode Replace unsafe spare_capacity_mut and from_raw_parts_mut usage with safe Vec initialization and direct read calls in fast_encode and fast_decode. This eliminates potential safety risks while preserving buffer functionality. * perf(base32): optimize input handling by switching to BufRead for efficient buffering Switch from unbuffered Read to BufRead in get_input, handle_input, and fast_encode_stream functions. This reduces syscalls by leveraging buffered reads, improving performance for base32 encoding/decoding operations. Refactor fast_encode_stream to use fill_buf() and manage leftover buffers more efficiently.
1 parent 9212ce2 commit 3de9411

File tree

1 file changed

+77
-39
lines changed

1 file changed

+77
-39
lines changed

src/uu/base32/src/base_common.rs

Lines changed: 77 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use clap::{Arg, ArgAction, Command};
99
use std::ffi::OsString;
1010
use std::fs::File;
11-
use std::io::{self, BufReader, ErrorKind, Read, Write};
11+
use std::io::{self, BufRead, BufReader, ErrorKind, Write};
1212
use std::path::{Path, PathBuf};
1313
use uucore::display::Quotable;
1414
use uucore::encoding::{
@@ -146,20 +146,26 @@ pub fn base_app(about: String, usage: String) -> Command {
146146
)
147147
}
148148

149-
pub fn get_input(config: &Config) -> UResult<Box<dyn Read>> {
149+
pub fn get_input(config: &Config) -> UResult<Box<dyn BufRead>> {
150150
match &config.to_read {
151151
Some(path_buf) => {
152152
let file =
153153
File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?;
154-
Ok(Box::new(BufReader::new(file)))
154+
Ok(Box::new(BufReader::with_capacity(
155+
DEFAULT_BUFFER_SIZE,
156+
file,
157+
)))
155158
}
156159
None => {
157160
// Stdin is already buffered by the OS; wrap once more to reduce syscalls per read.
158-
Ok(Box::new(BufReader::new(io::stdin())))
161+
Ok(Box::new(BufReader::with_capacity(
162+
DEFAULT_BUFFER_SIZE,
163+
io::stdin(),
164+
)))
159165
}
160166
}
161167
}
162-
pub fn handle_input<R: Read>(input: &mut R, format: Format, config: Config) -> UResult<()> {
168+
pub fn handle_input<R: BufRead>(input: &mut R, format: Format, config: Config) -> UResult<()> {
163169
// Always allow padding for Base64 to avoid a full pre-scan of the input.
164170
let supports_fast_decode_and_encode =
165171
get_supports_fast_decode_and_encode(format, config.decode, true);
@@ -292,11 +298,11 @@ pub fn get_supports_fast_decode_and_encode(
292298
}
293299

294300
pub mod fast_encode {
295-
use crate::base_common::{DEFAULT_BUFFER_SIZE, WRAP_DEFAULT};
301+
use crate::base_common::WRAP_DEFAULT;
296302
use std::{
297303
cmp::min,
298304
collections::VecDeque,
299-
io::{self, Read, Write},
305+
io::{self, BufRead, Write},
300306
num::NonZeroUsize,
301307
};
302308
use uucore::{
@@ -519,7 +525,7 @@ pub mod fast_encode {
519525
/// Remaining bytes are encoded and flushed at the end. I/O or encoding
520526
/// failures are propagated via `UResult`.
521527
pub fn fast_encode_stream(
522-
input: &mut dyn Read,
528+
input: &mut dyn BufRead,
523529
output: &mut dyn Write,
524530
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
525531
wrap: Option<usize>,
@@ -544,47 +550,79 @@ pub mod fast_encode {
544550
};
545551

546552
// Buffers
547-
let mut leftover_buffer = VecDeque::<u8>::new();
548553
let mut encoded_buffer = VecDeque::<u8>::new();
549-
550-
let mut read_buffer = vec![0u8; encode_in_chunks_of_size.max(DEFAULT_BUFFER_SIZE)];
554+
let mut leftover_buffer = Vec::<u8>::with_capacity(encode_in_chunks_of_size);
551555

552556
loop {
553-
let read = input
554-
.read(&mut read_buffer)
557+
let read_buffer = input
558+
.fill_buf()
555559
.map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?;
556-
if read == 0 {
560+
if read_buffer.is_empty() {
557561
break;
558562
}
559563

560-
leftover_buffer.extend(&read_buffer[..read]);
564+
let mut consumed = 0;
561565

562-
while leftover_buffer.len() >= encode_in_chunks_of_size {
563-
{
564-
let contiguous = leftover_buffer.make_contiguous();
566+
if !leftover_buffer.is_empty() {
567+
let needed = encode_in_chunks_of_size - leftover_buffer.len();
568+
let take = needed.min(read_buffer.len());
569+
leftover_buffer.extend_from_slice(&read_buffer[..take]);
570+
consumed += take;
571+
572+
if leftover_buffer.len() == encode_in_chunks_of_size {
565573
encode_in_chunks_to_buffer(
566574
supports_fast_decode_and_encode,
567-
&contiguous[..encode_in_chunks_of_size],
575+
leftover_buffer.as_slice(),
576+
&mut encoded_buffer,
577+
)?;
578+
leftover_buffer.clear();
579+
580+
write_to_output(
581+
&mut line_wrapping,
568582
&mut encoded_buffer,
583+
output,
584+
false,
585+
wrap == Some(0),
569586
)?;
570587
}
588+
}
571589

572-
// Drop the data we just encoded
573-
leftover_buffer.drain(..encode_in_chunks_of_size);
590+
let remaining = &read_buffer[consumed..];
591+
let full_chunk_bytes =
592+
(remaining.len() / encode_in_chunks_of_size) * encode_in_chunks_of_size;
574593

575-
write_to_output(
576-
&mut line_wrapping,
577-
&mut encoded_buffer,
578-
output,
579-
false,
580-
wrap == Some(0),
581-
)?;
594+
if full_chunk_bytes > 0 {
595+
for chunk in remaining[..full_chunk_bytes].chunks_exact(encode_in_chunks_of_size) {
596+
encode_in_chunks_to_buffer(
597+
supports_fast_decode_and_encode,
598+
chunk,
599+
&mut encoded_buffer,
600+
)?;
601+
write_to_output(
602+
&mut line_wrapping,
603+
&mut encoded_buffer,
604+
output,
605+
false,
606+
wrap == Some(0),
607+
)?;
608+
}
609+
consumed += full_chunk_bytes;
610+
}
611+
612+
if consumed < read_buffer.len() {
613+
leftover_buffer.extend_from_slice(&read_buffer[consumed..]);
614+
consumed = read_buffer.len();
582615
}
616+
617+
input.consume(consumed);
618+
619+
// `leftover_buffer` should never exceed one partial chunk.
620+
debug_assert!(leftover_buffer.len() < encode_in_chunks_of_size);
583621
}
584622

585623
// Encode any remaining bytes and flush
586624
supports_fast_decode_and_encode
587-
.encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?;
625+
.encode_to_vec_deque(&leftover_buffer, &mut encoded_buffer)?;
588626

589627
write_to_output(
590628
&mut line_wrapping,
@@ -599,8 +637,7 @@ pub mod fast_encode {
599637
}
600638

601639
pub mod fast_decode {
602-
use crate::base_common::DEFAULT_BUFFER_SIZE;
603-
use std::io::{self, Read, Write};
640+
use std::io::{self, BufRead, Write};
604641
use uucore::{
605642
encoding::SupportsFastDecodeAndEncode,
606643
error::{UResult, USimpleError},
@@ -630,7 +667,6 @@ pub mod fast_decode {
630667
fn write_to_output(decoded_buffer: &mut Vec<u8>, output: &mut dyn Write) -> io::Result<()> {
631668
// Write all data in `decoded_buffer` to `output`
632669
output.write_all(decoded_buffer.as_slice())?;
633-
output.flush()?;
634670

635671
decoded_buffer.clear();
636672

@@ -764,7 +800,7 @@ pub mod fast_decode {
764800
}
765801

766802
pub fn fast_decode_stream(
767-
input: &mut dyn Read,
803+
input: &mut dyn BufRead,
768804
output: &mut dyn Write,
769805
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
770806
ignore_garbage: bool,
@@ -783,17 +819,17 @@ pub mod fast_decode {
783819

784820
let mut buffer = Vec::with_capacity(decode_in_chunks_of_size);
785821
let mut decoded_buffer = Vec::<u8>::new();
786-
let mut read_buffer = [0u8; DEFAULT_BUFFER_SIZE];
787822

788823
loop {
789-
let read = input
790-
.read(&mut read_buffer)
824+
let read_buffer = input
825+
.fill_buf()
791826
.map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?;
792-
if read == 0 {
827+
let read_len = read_buffer.len();
828+
if read_len == 0 {
793829
break;
794830
}
795831

796-
for &byte in &read_buffer[..read] {
832+
for &byte in read_buffer {
797833
if byte == b'\n' || byte == b'\r' {
798834
continue;
799835
}
@@ -845,6 +881,8 @@ pub mod fast_decode {
845881
buffer.clear();
846882
}
847883
}
884+
885+
input.consume(read_len);
848886
}
849887

850888
if supports_partial_decode {
@@ -902,7 +940,7 @@ fn format_read_error(kind: ErrorKind) -> String {
902940

903941
/// Determines if the input buffer contains any padding ('=') ignoring trailing whitespace.
904942
#[cfg(test)]
905-
fn read_and_has_padding<R: Read>(input: &mut R) -> UResult<(bool, Vec<u8>)> {
943+
fn read_and_has_padding<R: std::io::Read>(input: &mut R) -> UResult<(bool, Vec<u8>)> {
906944
let mut buf = Vec::new();
907945
input
908946
.read_to_end(&mut buf)

0 commit comments

Comments
 (0)