Skip to content

Commit 76cb42b

Browse files
author
Andrea Calabrese
committed
Add support for algorithms that do not support streaming
base58 does not support streaming when encoding. This patch allows base58 and other not-streaming algorithms to work with the new streaming mechanism. Signed-off-by: Andrea Calabrese <andrea.calabrese@amarulasolutions.com>
1 parent 3cbeb0b commit 76cb42b

File tree

2 files changed

+157
-91
lines changed

2 files changed

+157
-91
lines changed

src/uu/base32/src/base_common.rs

Lines changed: 115 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,17 @@ pub mod fast_encode {
368368
// Based on performance testing
369369

370370
const ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024;
371-
let encode_in_chunks_of_size =
372-
supports_fast_decode_and_encode.unpadded_multiple() * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
373-
assert!(encode_in_chunks_of_size > 0);
371+
let mut encode_in_chunks_of_size = 0;
372+
if supports_fast_decode_and_encode.should_buffer_encoding() {
373+
encode_in_chunks_of_size = supports_fast_decode_and_encode.unpadded_multiple()
374+
* ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
375+
}
376+
377+
assert!(
378+
(encode_in_chunks_of_size > 0
379+
&& supports_fast_decode_and_encode.should_buffer_decoding())
380+
|| !supports_fast_decode_and_encode.should_buffer_encoding()
381+
);
374382

375383
// The "data-encoding" crate supports line wrapping, but not arbitrary line wrapping, only certain widths, so
376384
// line wrapping must be handled here.
@@ -392,51 +400,55 @@ pub mod fast_encode {
392400

393401
// Start of buffers
394402
// Data that was read from `input` but has not been encoded yet
395-
let mut leftover_buffer = VecDeque::<u8>::new();
403+
let mut leftover_buffer = Vec::<u8>::new();
396404
// Encoded data that needs to be written to `output`
397405
let mut encoded_buffer = VecDeque::<u8>::new();
398-
let mut in_reader = BufReader::with_capacity(encode_in_chunks_of_size, input);
399406
// End of buffers
400-
loop {
401-
let buf_res = in_reader.fill_buf();
402-
if let Ok(buffer) = buf_res {
403-
let buffer_len = buffer.len();
404-
if buffer.len() < encode_in_chunks_of_size {
405-
leftover_buffer.extend(buffer);
406-
assert!(leftover_buffer.len() < encode_in_chunks_of_size);
407-
break;
408-
}
409-
assert_eq!(buffer.len(), encode_in_chunks_of_size);
410-
encode_in_chunks_to_buffer(
411-
supports_fast_decode_and_encode,
412-
buffer,
413-
&mut encoded_buffer,
414-
)?;
415-
// Write all data in `encoded_buffer` to `output`
416-
write_to_output(
417-
&mut line_wrapping,
418-
&mut encoded_buffer,
419-
output,
420-
false,
421-
wrap == Some(0),
422-
)?;
423-
in_reader.consume(buffer_len);
424-
} else if let Err(err) = buf_res {
425-
let kind = err.kind();
407+
if supports_fast_decode_and_encode.should_buffer_encoding() {
408+
let mut in_reader = BufReader::with_capacity(encode_in_chunks_of_size, input);
409+
loop {
410+
let buf_res = in_reader.fill_buf();
411+
if let Ok(buffer) = buf_res {
412+
let buffer_len = buffer.len();
413+
if buffer.len() < encode_in_chunks_of_size {
414+
leftover_buffer.extend(buffer);
415+
assert!(leftover_buffer.len() < encode_in_chunks_of_size);
416+
break;
417+
}
418+
assert_eq!(buffer.len(), encode_in_chunks_of_size);
419+
encode_in_chunks_to_buffer(
420+
supports_fast_decode_and_encode,
421+
buffer,
422+
&mut encoded_buffer,
423+
)?;
424+
// Write all data in `encoded_buffer` to `output`
425+
write_to_output(
426+
&mut line_wrapping,
427+
&mut encoded_buffer,
428+
output,
429+
false,
430+
wrap == Some(0),
431+
)?;
432+
in_reader.consume(buffer_len);
433+
} else if let Err(err) = buf_res {
434+
let kind = err.kind();
426435

427-
if kind == ErrorKind::Interrupted {
428-
// Retry reading
429-
continue;
430-
}
436+
if kind == ErrorKind::Interrupted {
437+
// Retry reading
438+
continue;
439+
}
431440

432-
return Err(USimpleError::new(1, format_read_error(kind)));
441+
return Err(USimpleError::new(1, format_read_error(kind)));
442+
}
433443
}
444+
} else {
445+
input.read_to_end(&mut leftover_buffer).unwrap();
434446
}
435447
// Cleanup
436448
// `input` has finished producing data, so the data remaining in the buffers needs to be encoded and printed
437449
// Encode all remaining unencoded bytes, placing them in `encoded_buffer`
438450
supports_fast_decode_and_encode
439-
.encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?;
451+
.encode_to_vec_deque(&leftover_buffer, &mut encoded_buffer)?;
440452
// Write all data in `encoded_buffer` to output
441453
// `is_cleanup` triggers special cleanup-only logic
442454
write_to_output(
@@ -522,9 +534,17 @@ pub mod fast_decode {
522534
) -> UResult<()> {
523535
const DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024;
524536
let alphabet = supports_fast_decode_and_encode.alphabet();
525-
let decode_in_chunks_of_size = supports_fast_decode_and_encode.valid_decoding_multiple()
526-
* DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
527-
assert!(decode_in_chunks_of_size > 0);
537+
let mut decode_in_chunks_of_size = 0;
538+
if supports_fast_decode_and_encode.should_buffer_decoding() {
539+
decode_in_chunks_of_size = supports_fast_decode_and_encode.valid_decoding_multiple()
540+
* DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE;
541+
}
542+
543+
assert!(
544+
(decode_in_chunks_of_size > 0
545+
&& supports_fast_decode_and_encode.should_buffer_decoding())
546+
|| !supports_fast_decode_and_encode.should_buffer_decoding()
547+
);
528548

529549
// Note that it's not worth using "data-encoding"'s ignore functionality if `ignore_garbage` is true, because
530550
// "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before
@@ -547,58 +567,64 @@ pub mod fast_decode {
547567
Vec::with_capacity(decode_in_chunks_of_size),
548568
];
549569
let mut current_buffer_index = 0usize;
550-
let mut in_reader = BufReader::with_capacity(decode_in_chunks_of_size, input);
551-
// End of buffers
552-
loop {
553-
while buffers[current_buffer_index].len() < decode_in_chunks_of_size {
554-
let read_res = in_reader.fill_buf();
555-
if let Ok(read_buffer) = read_res {
556-
let read_size = read_buffer.len();
557-
if read_size == 0 {
558-
break;
559-
}
560-
// Filter and fill the valid buffer. When it is filled, we
561-
// switch buffer to avoid reading again.
562-
read_buffer
563-
.iter()
564-
.filter(|ch| table[usize::from(**ch)])
565-
.for_each(|ch| {
566-
if buffers[current_buffer_index].len() < decode_in_chunks_of_size {
567-
buffers[current_buffer_index].push(*ch);
568-
} else {
569-
buffers[(current_buffer_index + 1) % buffers.len()].push(*ch);
570-
}
571-
});
572-
in_reader.consume(read_size);
573-
} else if let Err(err) = read_res {
574-
let kind = err.kind();
575-
576-
if kind == ErrorKind::Interrupted {
577-
// Retry reading
578-
continue;
570+
if supports_fast_decode_and_encode.should_buffer_decoding() {
571+
let mut in_reader = BufReader::with_capacity(decode_in_chunks_of_size, input);
572+
// End of buffers
573+
loop {
574+
while buffers[current_buffer_index].len() < decode_in_chunks_of_size {
575+
let read_res = in_reader.fill_buf();
576+
if let Ok(read_buffer) = read_res {
577+
let read_size = read_buffer.len();
578+
if read_size == 0 {
579+
break;
580+
}
581+
// Filter and fill the valid buffer. When it is filled, we
582+
// switch buffer to avoid reading again.
583+
read_buffer
584+
.iter()
585+
.filter(|ch| table[usize::from(**ch)])
586+
.for_each(|ch| {
587+
if buffers[current_buffer_index].len() < decode_in_chunks_of_size {
588+
buffers[current_buffer_index].push(*ch);
589+
} else {
590+
buffers[(current_buffer_index + 1) % buffers.len()].push(*ch);
591+
}
592+
});
593+
in_reader.consume(read_size);
594+
} else if let Err(err) = read_res {
595+
let kind = err.kind();
596+
597+
if kind == ErrorKind::Interrupted {
598+
// Retry reading
599+
continue;
600+
}
601+
602+
return Err(USimpleError::new(1, format_read_error(kind)));
579603
}
580-
581-
return Err(USimpleError::new(1, format_read_error(kind)));
582604
}
605+
if buffers[current_buffer_index].len() < decode_in_chunks_of_size {
606+
break;
607+
}
608+
assert_eq!(
609+
buffers[current_buffer_index].len(),
610+
decode_in_chunks_of_size
611+
);
612+
// Decode data in chunks, then place it in `decoded_buffer`
613+
decode_in_chunks_to_buffer(
614+
supports_fast_decode_and_encode,
615+
&buffers[current_buffer_index],
616+
&mut decoded_buffer,
617+
)?;
618+
// Write all data in `decoded_buffer` to `output`
619+
write_to_output(&mut decoded_buffer, output)?;
620+
621+
buffers[current_buffer_index].clear();
622+
current_buffer_index = (current_buffer_index + 1) % buffers.len();
583623
}
584-
if buffers[current_buffer_index].len() < decode_in_chunks_of_size {
585-
break;
586-
}
587-
assert_eq!(
588-
buffers[current_buffer_index].len(),
589-
decode_in_chunks_of_size
590-
);
591-
// Decode data in chunks, then place it in `decoded_buffer`
592-
decode_in_chunks_to_buffer(
593-
supports_fast_decode_and_encode,
594-
&buffers[current_buffer_index],
595-
&mut decoded_buffer,
596-
)?;
597-
// Write all data in `decoded_buffer` to `output`
598-
write_to_output(&mut decoded_buffer, output)?;
599-
600-
buffers[current_buffer_index].clear();
601-
current_buffer_index = (current_buffer_index + 1) % buffers.len();
624+
} else {
625+
input
626+
.read_to_end(&mut buffers[current_buffer_index])
627+
.unwrap();
602628
}
603629
// Cleanup
604630
// `input` has finished producing data, so the data remaining in the buffers needs to be decoded and printed

src/uucore/src/lib/features/encoding.rs

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ impl Base64SimdWrapper {
3939
}
4040

4141
impl SupportsFastDecodeAndEncode for Base64SimdWrapper {
42+
fn should_buffer_decoding(&self) -> bool {
43+
true
44+
}
45+
46+
fn should_buffer_encoding(&self) -> bool {
47+
true
48+
}
49+
4250
fn alphabet(&self) -> &'static [u8] {
4351
self.alphabet
4452
}
@@ -55,8 +63,8 @@ impl SupportsFastDecodeAndEncode for Base64SimdWrapper {
5563
}
5664
Err(_) => {
5765
// Check if the padding works
58-
let decoded_2 = base64_simd::STANDARD.decode_to_vec(input);
59-
match decoded_2 {
66+
let decoded_with_pad = base64_simd::STANDARD.decode_to_vec(input);
67+
match decoded_with_pad {
6068
Ok(decoded_bytes_2) => {
6169
output.extend_from_slice(&decoded_bytes_2);
6270
Ok(())
@@ -184,9 +192,25 @@ pub trait SupportsFastDecodeAndEncode {
184192
///
185193
/// The decoding performed by `fast_decode` depends on this number being correct.
186194
fn valid_decoding_multiple(&self) -> usize;
195+
196+
/// Returns whether the encoder should use buffering
197+
/// If true, ignore the unpadded_multiple
198+
fn should_buffer_encoding(&self) -> bool;
199+
200+
/// Returns whether the decoder should use buffering
201+
/// If true, ignore the valid_decoding_multiple
202+
fn should_buffer_decoding(&self) -> bool;
187203
}
188204

189205
impl SupportsFastDecodeAndEncode for Base58Wrapper {
206+
fn should_buffer_decoding(&self) -> bool {
207+
true
208+
}
209+
210+
fn should_buffer_encoding(&self) -> bool {
211+
false
212+
}
213+
190214
fn alphabet(&self) -> &'static [u8] {
191215
// Base58 alphabet
192216
b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
@@ -340,6 +364,14 @@ impl SupportsFastDecodeAndEncode for Base58Wrapper {
340364
}
341365

342366
impl SupportsFastDecodeAndEncode for Z85Wrapper {
367+
fn should_buffer_decoding(&self) -> bool {
368+
true
369+
}
370+
371+
fn should_buffer_encoding(&self) -> bool {
372+
true
373+
}
374+
343375
fn alphabet(&self) -> &'static [u8] {
344376
// Z85 alphabet
345377
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#"
@@ -389,6 +421,14 @@ impl SupportsFastDecodeAndEncode for Z85Wrapper {
389421
}
390422

391423
impl SupportsFastDecodeAndEncode for EncodingWrapper {
424+
fn should_buffer_decoding(&self) -> bool {
425+
true
426+
}
427+
428+
fn should_buffer_encoding(&self) -> bool {
429+
true
430+
}
431+
392432
fn alphabet(&self) -> &'static [u8] {
393433
self.alphabet
394434
}

0 commit comments

Comments
 (0)