Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
305 changes: 259 additions & 46 deletions src/uu/base32/src/base_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

// spell-checker:ignore hexupper lsbf msbf unpadded nopad aGVsbG8sIHdvcmxkIQ
// spell-checker:ignore hexupper lsbf msbf unpadded nopad aGVsbG8sIHdvcmxkIQ behaviour

use clap::{Arg, ArgAction, Command};
use std::ffi::OsString;
use std::fs::File;
use std::io::{self, ErrorKind, Read, Seek, Write};
use std::io::{self, BufReader, ErrorKind, Read, Write};
use std::path::{Path, PathBuf};
use uucore::display::Quotable;
use uucore::encoding::{
Expand Down Expand Up @@ -149,64 +149,63 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
)
}

/// A trait alias for types that implement both `Read` and `Seek`.
pub trait ReadSeek: Read + Seek {}

/// Automatically implement the `ReadSeek` trait for any type that implements both `Read` and `Seek`.
impl<T: Read + Seek> ReadSeek for T {}

pub fn get_input(config: &Config) -> UResult<Box<dyn ReadSeek>> {
pub fn get_input(config: &Config) -> UResult<Box<dyn Read>> {
match &config.to_read {
Some(path_buf) => {
// Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode`
let file =
File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?;
Ok(Box::new(file))
Ok(Box::new(BufReader::new(file)))
}
None => {
let mut buffer = Vec::new();
io::stdin().read_to_end(&mut buffer)?;
Ok(Box::new(io::Cursor::new(buffer)))
// Stdin is already buffered by the OS; wrap once more to reduce syscalls per read.
Ok(Box::new(BufReader::new(io::stdin())))
}
}
}

/// Determines if the input buffer contains any padding ('=') ignoring trailing whitespace.
fn read_and_has_padding<R: Read>(input: &mut R) -> UResult<(bool, Vec<u8>)> {
let mut buf = Vec::new();
input
.read_to_end(&mut buf)
.map_err(|err| USimpleError::new(1, format_read_error(err.kind())))?;

// Treat the stream as padded if any '=' exists (GNU coreutils continues decoding
// even when padding bytes are followed by more data).
let has_padding = buf.contains(&b'=');

Ok((has_padding, buf))
}

pub fn handle_input<R: Read + Seek>(input: &mut R, format: Format, config: Config) -> UResult<()> {
let (has_padding, read) = read_and_has_padding(input)?;

pub fn handle_input<R: Read>(input: &mut R, format: Format, config: Config) -> UResult<()> {
// Always allow padding for Base64 to avoid a full pre-scan of the input.
let supports_fast_decode_and_encode =
get_supports_fast_decode_and_encode(format, config.decode, has_padding);
get_supports_fast_decode_and_encode(format, config.decode, true);

let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();
let mut stdout_lock = io::stdout().lock();
let result = if config.decode {
fast_decode::fast_decode(
read,
let result = match (format, config.decode) {
// Base58 must process the entire input as one big integer; keep the
// historical behaviour of buffering everything for this format only.
(Format::Base58, _) => {
let mut buffered = Vec::new();
input
.read_to_end(&mut buffered)
.map_err(|err| USimpleError::new(1, format_read_error(err.kind())))?;
if config.decode {
fast_decode::fast_decode_buffer(
buffered,
&mut stdout_lock,
supports_fast_decode_and_encode_ref,
config.ignore_garbage,
)
} else {
fast_encode::fast_encode_buffer(
buffered,
&mut stdout_lock,
supports_fast_decode_and_encode_ref,
config.wrap_cols,
)
}
}
// Streaming path for all other encodings keeps memory bounded.
(_, true) => fast_decode::fast_decode_stream(
input,
&mut stdout_lock,
supports_fast_decode_and_encode_ref,
config.ignore_garbage,
)
} else {
fast_encode::fast_encode(
read,
),
(_, false) => fast_encode::fast_encode_stream(
input,
&mut stdout_lock,
supports_fast_decode_and_encode_ref,
config.wrap_cols,
)
),
};

// Ensure any pending stdout buffer is flushed even if decoding failed; GNU basenc
Expand Down Expand Up @@ -300,10 +299,13 @@ pub mod fast_encode {
use std::{
cmp::min,
collections::VecDeque,
io::{self, Write},
io::{self, Read, Write},
num::NonZeroUsize,
};
use uucore::{encoding::SupportsFastDecodeAndEncode, error::UResult};
use uucore::{
encoding::SupportsFastDecodeAndEncode,
error::{UResult, USimpleError},
};

struct LineWrapping {
line_length: NonZeroUsize,
Expand Down Expand Up @@ -405,7 +407,7 @@ pub mod fast_encode {
}
// End of helper functions

pub fn fast_encode(
pub fn fast_encode_buffer(
input: Vec<u8>,
output: &mut dyn Write,
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
Expand Down Expand Up @@ -506,10 +508,89 @@ pub mod fast_encode {
}
Ok(())
}

pub fn fast_encode_stream(
input: &mut dyn Read,
output: &mut dyn Write,
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
wrap: Option<usize>,
) -> UResult<()> {
const ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024;

let encode_in_chunks_of_size =
supports_fast_decode_and_encode.unpadded_multiple() * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE;

assert!(encode_in_chunks_of_size > 0);

let mut line_wrapping = match wrap {
Some(0) => None,
Some(an) => Some(LineWrapping {
line_length: NonZeroUsize::new(an).unwrap(),
print_buffer: Vec::<u8>::new(),
}),
None => Some(LineWrapping {
line_length: NonZeroUsize::new(WRAP_DEFAULT).unwrap(),
print_buffer: Vec::<u8>::new(),
}),
};

// Buffers
let mut leftover_buffer = VecDeque::<u8>::new();
let mut encoded_buffer = VecDeque::<u8>::new();

let mut read_buffer = vec![0u8; encode_in_chunks_of_size.max(8_192)];

loop {
let read = input
.read(&mut read_buffer)
.map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?;
if read == 0 {
break;
}

leftover_buffer.extend(&read_buffer[..read]);

while leftover_buffer.len() >= encode_in_chunks_of_size {
{
let contiguous = leftover_buffer.make_contiguous();
encode_in_chunks_to_buffer(
supports_fast_decode_and_encode,
&contiguous[..encode_in_chunks_of_size],
&mut encoded_buffer,
)?;
}

// Drop the data we just encoded
leftover_buffer.drain(..encode_in_chunks_of_size);

write_to_output(
&mut line_wrapping,
&mut encoded_buffer,
output,
false,
wrap == Some(0),
)?;
}
}

// Encode any remaining bytes and flush
supports_fast_decode_and_encode
.encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?;

write_to_output(
&mut line_wrapping,
&mut encoded_buffer,
output,
true,
wrap == Some(0),
)?;

Ok(())
}
}

pub mod fast_decode {
use std::io::{self, Write};
use std::io::{self, Read, Write};
use uucore::{
encoding::SupportsFastDecodeAndEncode,
error::{UResult, USimpleError},
Expand Down Expand Up @@ -579,7 +660,7 @@ pub mod fast_decode {
}
// End of helper functions

pub fn fast_decode(
pub fn fast_decode_buffer(
input: Vec<u8>,
output: &mut dyn Write,
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
Expand Down Expand Up @@ -671,6 +752,123 @@ pub mod fast_decode {

Ok(())
}

pub fn fast_decode_stream(
input: &mut dyn Read,
output: &mut dyn Write,
supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode,
ignore_garbage: bool,
) -> UResult<()> {
const DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024;

let alphabet = supports_fast_decode_and_encode.alphabet();
let alphabet_table = alphabet_lookup(alphabet);
let valid_multiple = supports_fast_decode_and_encode.valid_decoding_multiple();
let decode_in_chunks_of_size = valid_multiple * DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE;

assert!(decode_in_chunks_of_size > 0);
assert!(valid_multiple > 0);

let supports_partial_decode = supports_fast_decode_and_encode.supports_partial_decode();

let mut buffer = Vec::with_capacity(decode_in_chunks_of_size);
let mut decoded_buffer = Vec::<u8>::new();
let mut read_buffer = [0u8; 8_192];

loop {
let read = input
.read(&mut read_buffer)
.map_err(|err| USimpleError::new(1, super::format_read_error(err.kind())))?;
if read == 0 {
break;
}

for &byte in &read_buffer[..read] {
if byte == b'\n' || byte == b'\r' {
continue;
}

if alphabet_table[usize::from(byte)] {
buffer.push(byte);
} else if ignore_garbage {
continue;
} else {
if supports_partial_decode {
flush_ready_chunks(
&mut buffer,
decode_in_chunks_of_size,
valid_multiple,
supports_fast_decode_and_encode,
&mut decoded_buffer,
output,
)?;
} else {
while buffer.len() >= decode_in_chunks_of_size {
decode_in_chunks_to_buffer(
supports_fast_decode_and_encode,
&buffer[..decode_in_chunks_of_size],
&mut decoded_buffer,
)?;
write_to_output(&mut decoded_buffer, output)?;
buffer.drain(..decode_in_chunks_of_size);
}
}
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
}

if supports_partial_decode {
flush_ready_chunks(
&mut buffer,
decode_in_chunks_of_size,
valid_multiple,
supports_fast_decode_and_encode,
&mut decoded_buffer,
output,
)?;
} else if buffer.len() == decode_in_chunks_of_size {
decode_in_chunks_to_buffer(
supports_fast_decode_and_encode,
&buffer,
&mut decoded_buffer,
)?;
write_to_output(&mut decoded_buffer, output)?;
buffer.clear();
}
}
}

if supports_partial_decode {
flush_ready_chunks(
&mut buffer,
decode_in_chunks_of_size,
valid_multiple,
supports_fast_decode_and_encode,
&mut decoded_buffer,
output,
)?;
}

if !buffer.is_empty() {
let mut owned_chunk: Option<Vec<u8>> = None;
let mut had_invalid_tail = false;

if let Some(pad_result) = supports_fast_decode_and_encode.pad_remainder(&buffer) {
had_invalid_tail = pad_result.had_invalid_tail;
owned_chunk = Some(pad_result.chunk);
}

let final_chunk = owned_chunk.as_deref().unwrap_or(&buffer);

supports_fast_decode_and_encode.decode_into_vec(final_chunk, &mut decoded_buffer)?;
write_to_output(&mut decoded_buffer, output)?;

if had_invalid_tail {
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
}
}

Ok(())
}
}

fn format_read_error(kind: ErrorKind) -> String {
Expand All @@ -692,6 +890,21 @@ fn format_read_error(kind: ErrorKind) -> String {
translate!("base-common-read-error", "error" => kind_string_capitalized)
}

/// Determines if the input buffer contains any padding ('=') ignoring trailing whitespace.
#[cfg(test)]
fn read_and_has_padding<R: Read>(input: &mut R) -> UResult<(bool, Vec<u8>)> {
let mut buf = Vec::new();
input
.read_to_end(&mut buf)
.map_err(|err| USimpleError::new(1, format_read_error(err.kind())))?;

// Treat the stream as padded if any '=' exists (GNU coreutils continues decoding
// even when padding bytes are followed by more data).
let has_padding = buf.contains(&b'=');

Ok((has_padding, buf))
}

#[cfg(test)]
mod tests {
use crate::base_common::read_and_has_padding;
Expand Down
Loading