Skip to content

Commit cbd4b6e

Browse files
committed
checksum: Introduce a DigestOutput type...
... to prevent a preemptive computation of the hex encoding.
1 parent 6ae0e5d commit cbd4b6e

File tree

4 files changed

+101
-75
lines changed

4 files changed

+101
-75
lines changed

src/uucore/src/lib/features/checksum/compute.rs

Lines changed: 25 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ use std::path::Path;
1313
use crate::checksum::{ChecksumError, SizedAlgoKind, digest_reader, escape_filename};
1414
use crate::error::{FromIo, UResult, USimpleError};
1515
use crate::line_ending::LineEnding;
16-
use crate::{encoding, show, translate};
16+
use crate::sum::DigestOutput;
17+
use crate::{show, translate};
1718

1819
/// Use the same buffer size as GNU when reading a file to create a checksum
1920
/// from it: 32 KiB.
@@ -139,10 +140,11 @@ pub fn figure_out_output_format(
139140
fn print_legacy_checksum(
140141
options: &ChecksumComputeOptions,
141142
filename: &OsStr,
142-
sum: &str,
143+
sum: &DigestOutput,
143144
size: usize,
144145
) -> UResult<()> {
145146
debug_assert!(options.algo_kind.is_legacy());
147+
debug_assert!(matches!(sum, DigestOutput::U16(_) | DigestOutput::Crc(_)));
146148

147149
let (escaped_filename, prefix) = if options.line_ending == LineEnding::Nul {
148150
(filename.to_string_lossy().to_string(), "")
@@ -153,25 +155,23 @@ fn print_legacy_checksum(
153155
print!("{prefix}");
154156

155157
// Print the sum
156-
match options.algo_kind {
157-
SizedAlgoKind::Sysv => print!(
158-
"{} {}",
159-
sum.parse::<u16>().unwrap(),
158+
match (options.algo_kind, sum) {
159+
(SizedAlgoKind::Sysv, DigestOutput::U16(sum)) => print!(
160+
"{prefix}{sum} {}",
160161
size.div_ceil(options.algo_kind.bitlen()),
161162
),
162-
SizedAlgoKind::Bsd => {
163+
(SizedAlgoKind::Bsd, DigestOutput::U16(sum)) => {
163164
// The BSD checksum output is 5 digit integer
164165
let bsd_width = 5;
165166
print!(
166-
"{:0bsd_width$} {:bsd_width$}",
167-
sum.parse::<u16>().unwrap(),
167+
"{prefix}{sum:0bsd_width$} {:bsd_width$}",
168168
size.div_ceil(options.algo_kind.bitlen()),
169169
);
170170
}
171-
SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => {
172-
print!("{sum} {size}");
171+
(SizedAlgoKind::Crc | SizedAlgoKind::Crc32b, DigestOutput::Crc(sum)) => {
172+
print!("{prefix}{sum} {size}");
173173
}
174-
_ => unreachable!("Not a legacy algorithm"),
174+
(algo, output) => unreachable!("Bug: Invalid legacy checksum ({algo:?}, {output:?})"),
175175
}
176176

177177
// Print the filename after a space if not stdin
@@ -284,49 +284,39 @@ where
284284

285285
let mut digest = options.algo_kind.create_digest();
286286

287-
let (sum_hex, sz) = digest_reader(
288-
&mut digest,
289-
&mut file,
290-
options.binary,
291-
options.algo_kind.bitlen(),
292-
)
293-
.map_err_context(|| translate!("cksum-error-failed-to-read-input"))?;
287+
let (digest_output, sz) = digest_reader(&mut digest, &mut file, options.binary)
288+
.map_err_context(|| translate!("cksum-error-failed-to-read-input"))?;
294289

295290
// Encodes the sum if df is Base64, leaves as-is otherwise.
296-
let encode_sum = |sum: String, df: DigestFormat| {
291+
let encode_sum = |sum: DigestOutput, df: DigestFormat| {
297292
if df.is_base64() {
298-
encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap())
293+
sum.to_base64()
299294
} else {
300-
sum
295+
sum.to_hex()
301296
}
302297
};
303298

304299
match options.output_format {
305300
OutputFormat::Raw => {
306-
let bytes = match options.algo_kind {
307-
SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => {
308-
sum_hex.parse::<u32>().unwrap().to_be_bytes().to_vec()
309-
}
310-
SizedAlgoKind::Sysv | SizedAlgoKind::Bsd => {
311-
sum_hex.parse::<u16>().unwrap().to_be_bytes().to_vec()
312-
}
313-
_ => hex::decode(sum_hex).unwrap(),
314-
};
315301
// Cannot handle multiple files anyway, output immediately.
316-
io::stdout().write_all(&bytes)?;
302+
digest_output.write_raw(io::stdout())?;
317303
return Ok(());
318304
}
319305
OutputFormat::Legacy => {
320-
print_legacy_checksum(&options, filename, &sum_hex, sz)?;
306+
print_legacy_checksum(&options, filename, &digest_output, sz)?;
321307
}
322308
OutputFormat::Tagged(digest_format) => {
323-
print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?;
309+
print_tagged_checksum(
310+
&options,
311+
filename,
312+
&encode_sum(digest_output, digest_format)?,
313+
)?;
324314
}
325315
OutputFormat::Untagged(digest_format, reading_mode) => {
326316
print_untagged_checksum(
327317
&options,
328318
filename,
329-
&encode_sum(sum_hex, digest_format),
319+
&encode_sum(digest_output, digest_format)?,
330320
reading_mode,
331321
)?;
332322
}

src/uucore/src/lib/features/checksum/mod.rs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ use thiserror::Error;
1515
use crate::error::{UError, UResult};
1616
use crate::show_error;
1717
use crate::sum::{
18-
Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestWriter, Md5, Sha1, Sha3_224, Sha3_256,
19-
Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV,
18+
Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestOutput, DigestWriter, Md5, Sha1, Sha3_224,
19+
Sha3_256, Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV,
2020
};
2121

2222
pub mod compute;
@@ -420,8 +420,7 @@ pub fn digest_reader<T: Read>(
420420
digest: &mut Box<dyn Digest>,
421421
reader: &mut T,
422422
binary: bool,
423-
output_bits: usize,
424-
) -> io::Result<(String, usize)> {
423+
) -> io::Result<(DigestOutput, usize)> {
425424
digest.reset();
426425

427426
// Read bytes from `reader` and write those bytes to `digest`.
@@ -440,14 +439,7 @@ pub fn digest_reader<T: Read>(
440439
let output_size = std::io::copy(reader, &mut digest_writer)? as usize;
441440
digest_writer.finalize();
442441

443-
if digest.output_bits() > 0 {
444-
Ok((digest.result_str(), output_size))
445-
} else {
446-
// Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016)
447-
let mut bytes = vec![0; output_bits.div_ceil(8)];
448-
digest.hash_finalize(&mut bytes);
449-
Ok((hex::encode(bytes), output_size))
450-
}
442+
Ok((digest.result(), output_size))
451443
}
452444

453445
/// Calculates the length of the digest.

src/uucore/src/lib/features/checksum/validate.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -651,16 +651,11 @@ fn compute_and_check_digest_from_file(
651651

652652
// TODO: improve function signature to use ReadingMode instead of binary bool
653653
// Set binary to false because --binary is not supported with --check
654-
let (calculated_checksum, _) = digest_reader(
655-
&mut digest,
656-
&mut file_reader,
657-
/* binary */ false,
658-
algo.bitlen(),
659-
)
660-
.unwrap();
654+
let (calculated_checksum, _) =
655+
digest_reader(&mut digest, &mut file_reader, /* binary */ false).unwrap();
661656

662657
// Do the checksum validation
663-
let checksum_correct = expected_checksum == calculated_checksum;
658+
let checksum_correct = expected_checksum == calculated_checksum.to_hex()?;
664659
print_file_report(
665660
std::io::stdout(),
666661
filename,

src/uucore/src/lib/features/sum.rs

Lines changed: 69 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,52 @@
1212
//! [`DigestWriter`] struct provides a wrapper around [`Digest`] that
1313
//! implements the [`Write`] trait, for use in situations where calling
1414
//! [`write`] would be useful.
15-
use std::io::Write;
1615
17-
use hex::encode;
16+
use std::io::{self, Write};
17+
18+
use data_encoding::BASE64;
19+
1820
#[cfg(windows)]
1921
use memchr::memmem;
2022

23+
use crate::error::{UResult, USimpleError};
24+
25+
/// Represents the output of a checksum computation.
26+
#[derive(Debug)]
27+
pub enum DigestOutput {
28+
/// Varying-size output
29+
Vec(Vec<u8>),
30+
/// Legacy output for Crc and Crc32B modes
31+
Crc(u32),
32+
/// Legacy output for Sysv and BSD modes
33+
U16(u16),
34+
}
35+
36+
impl DigestOutput {
37+
pub fn write_raw(&self, mut w: impl std::io::Write) -> io::Result<()> {
38+
match self {
39+
Self::Vec(buf) => w.write_all(buf),
40+
// For legacy outputs, print them in big endian
41+
Self::Crc(n) => w.write_all(&n.to_be_bytes()),
42+
Self::U16(n) => w.write_all(&n.to_be_bytes()),
43+
}
44+
}
45+
46+
pub fn to_hex(&self) -> UResult<String> {
47+
match self {
48+
Self::Vec(buf) => Ok(hex::encode(buf)),
49+
_ => Err(USimpleError::new(1, "Legacy output cannot be encoded")),
50+
}
51+
}
52+
53+
pub fn to_base64(&self) -> UResult<String> {
54+
match self {
55+
Self::Vec(buf) => Ok(BASE64.encode(buf)),
56+
_ => Err(USimpleError::new(1, "Legacy output cannot be encoded")),
57+
}
58+
}
59+
}
60+
2161
pub trait Digest {
2262
fn new() -> Self
2363
where
@@ -29,10 +69,11 @@ pub trait Digest {
2969
fn output_bytes(&self) -> usize {
3070
self.output_bits().div_ceil(8)
3171
}
32-
fn result_str(&mut self) -> String {
72+
73+
fn result(&mut self) -> DigestOutput {
3374
let mut buf: Vec<u8> = vec![0; self.output_bytes()];
3475
self.hash_finalize(&mut buf);
35-
encode(buf)
76+
DigestOutput::Vec(buf)
3677
}
3778
}
3879

@@ -167,10 +208,12 @@ impl Digest for Crc {
167208
out.copy_from_slice(&self.digest.finalize().to_ne_bytes());
168209
}
169210

170-
fn result_str(&mut self) -> String {
211+
fn result(&mut self) -> DigestOutput {
171212
let mut out: [u8; 8] = [0; 8];
172213
self.hash_finalize(&mut out);
173-
u64::from_ne_bytes(out).to_string()
214+
215+
let x = u64::from_ne_bytes(out);
216+
DigestOutput::Crc((x & (u32::MAX as u64)) as u32)
174217
}
175218

176219
fn reset(&mut self) {
@@ -214,10 +257,10 @@ impl Digest for CRC32B {
214257
32
215258
}
216259

217-
fn result_str(&mut self) -> String {
260+
fn result(&mut self) -> DigestOutput {
218261
let mut out = [0; 4];
219262
self.hash_finalize(&mut out);
220-
format!("{}", u32::from_be_bytes(out))
263+
DigestOutput::Crc(u32::from_be_bytes(out))
221264
}
222265
}
223266

@@ -240,10 +283,10 @@ impl Digest for Bsd {
240283
out.copy_from_slice(&self.state.to_ne_bytes());
241284
}
242285

243-
fn result_str(&mut self) -> String {
244-
let mut _out: Vec<u8> = vec![0; 2];
286+
fn result(&mut self) -> DigestOutput {
287+
let mut _out = [0; 2];
245288
self.hash_finalize(&mut _out);
246-
format!("{}", self.state)
289+
DigestOutput::U16(self.state)
247290
}
248291

249292
fn reset(&mut self) {
@@ -275,10 +318,10 @@ impl Digest for SysV {
275318
out.copy_from_slice(&(self.state as u16).to_ne_bytes());
276319
}
277320

278-
fn result_str(&mut self) -> String {
279-
let mut _out: Vec<u8> = vec![0; 2];
321+
fn result(&mut self) -> DigestOutput {
322+
let mut _out = [0; 2];
280323
self.hash_finalize(&mut _out);
281-
format!("{}", self.state)
324+
DigestOutput::U16((self.state & (u16::MAX as u32)) as u16)
282325
}
283326

284327
fn reset(&mut self) {
@@ -319,7 +362,7 @@ macro_rules! impl_digest_common {
319362

320363
// Implements the Digest trait for sha2 / sha3 algorithms with variable output
321364
macro_rules! impl_digest_shake {
322-
($algo_type: ty) => {
365+
($algo_type: ty, $output_bits: literal) => {
323366
impl Digest for $algo_type {
324367
fn new() -> Self {
325368
Self(Default::default())
@@ -338,7 +381,13 @@ macro_rules! impl_digest_shake {
338381
}
339382

340383
fn output_bits(&self) -> usize {
341-
0
384+
$output_bits
385+
}
386+
387+
fn result(&mut self) -> DigestOutput {
388+
let mut bytes = vec![0; self.output_bits().div_ceil(8)];
389+
self.hash_finalize(&mut bytes);
390+
DigestOutput::Vec(bytes)
342391
}
343392
}
344393
};
@@ -368,8 +417,8 @@ impl_digest_common!(Sha3_512, 512);
368417

369418
pub struct Shake128(sha3::Shake128);
370419
pub struct Shake256(sha3::Shake256);
371-
impl_digest_shake!(Shake128);
372-
impl_digest_shake!(Shake256);
420+
impl_digest_shake!(Shake128, 256);
421+
impl_digest_shake!(Shake256, 512);
373422

374423
/// A struct that writes to a digest.
375424
///
@@ -501,14 +550,14 @@ mod tests {
501550
writer_crlf.write_all(b"\r").unwrap();
502551
writer_crlf.write_all(b"\n").unwrap();
503552
writer_crlf.finalize();
504-
let result_crlf = digest.result_str();
553+
let result_crlf = digest.result();
505554

506555
// We expect "\r\n" to be replaced with "\n" in text mode on Windows.
507556
let mut digest = Box::new(Md5::new()) as Box<dyn Digest>;
508557
let mut writer_lf = DigestWriter::new(&mut digest, false);
509558
writer_lf.write_all(b"\n").unwrap();
510559
writer_lf.finalize();
511-
let result_lf = digest.result_str();
560+
let result_lf = digest.result();
512561

513562
assert_eq!(result_crlf, result_lf);
514563
}

0 commit comments

Comments
 (0)