Skip to content

Commit eed7a0a

Browse files
authored
parser: add binary support to determine_number_system and parse_size (#9659)
* parser: add binary support to determine_number_system and parse_size * docs * tests * tests: threshold
1 parent a738fba commit eed7a0a

File tree

7 files changed

+249
-11
lines changed

7 files changed

+249
-11
lines changed

src/uu/df/locales/en-US.ftl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ df-after-help = Display values are in units of the first available SIZE from --b
77
88
SIZE is an integer and optional unit (example: 10M is 10*1024*1024).
99
Units are K, M, G, T, P, E, Z, Y (powers of 1024) or KB, MB,... (powers
10-
of 1000).
10+
of 1000). Units can be decimal, hexadecimal, octal, binary.
1111
1212
# Help messages
1313
df-help-print-help = Print help information.

src/uu/df/locales/fr-FR.ftl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ df-after-help = Les valeurs affichées sont en unités de la première TAILLE di
77
88
TAILLE est un entier et une unité optionnelle (exemple : 10M est 10*1024*1024).
99
Les unités sont K, M, G, T, P, E, Z, Y (puissances de 1024) ou KB, MB,... (puissances
10-
de 1000).
10+
de 1000). Les unités peuvent être décimales, hexadécimales, octales, binaires.
1111
1212
# Messages d'aide
1313
df-help-print-help = afficher les informations d'aide.

src/uu/du/locales/en-US.ftl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ du-after-help = Display values are in units of the first available SIZE from --b
77
88
SIZE is an integer and optional unit (example: 10M is 10*1024*1024).
99
Units are K, M, G, T, P, E, Z, Y (powers of 1024) or KB, MB,... (powers
10-
of 1000).
10+
of 1000). Units can be decimal, hexadecimal, octal, binary.
1111
1212
PATTERN allows some advanced exclusions. For example, the following syntaxes
1313
are supported:

src/uu/du/locales/fr-FR.ftl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ du-after-help = Les valeurs affichées sont en unités de la première TAILLE di
77
88
TAILLE est un entier et une unité optionnelle (exemple : 10M est 10*1024*1024).
99
Les unités sont K, M, G, T, P, E, Z, Y (puissances de 1024) ou KB, MB,... (puissances
10-
de 1000).
10+
de 1000). Les unités peuvent être décimales, hexadécimales, octales, binaires.
1111
1212
MOTIF permet des exclusions avancées. Par exemple, les syntaxes suivantes
1313
sont supportées :

src/uucore/src/lib/features/parser/parse_size.rs

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ enum NumberSystem {
106106
Decimal,
107107
Octal,
108108
Hexadecimal,
109+
Binary,
109110
}
110111

111112
impl<'parser> Parser<'parser> {
@@ -134,10 +135,11 @@ impl<'parser> Parser<'parser> {
134135
}
135136
/// Parse a size string into a number of bytes.
136137
///
137-
/// A size string comprises an integer and an optional unit. The unit
138-
/// may be K, M, G, T, P, E, Z, Y, R or Q (powers of 1024), or KB, MB,
139-
/// etc. (powers of 1000), or b which is 512.
140-
/// Binary prefixes can be used, too: KiB=K, MiB=M, and so on.
138+
/// A size string comprises an integer and an optional unit. The integer
139+
/// may be in decimal, octal (0 prefix), hexadecimal (0x prefix), or
140+
/// binary (0b prefix) notation. The unit may be K, M, G, T, P, E, Z, Y,
141+
/// R or Q (powers of 1024), or KB, MB, etc. (powers of 1000), or b which
142+
/// is 512. Binary prefixes can be used, too: KiB=K, MiB=M, and so on.
141143
///
142144
/// # Errors
143145
///
@@ -159,6 +161,7 @@ impl<'parser> Parser<'parser> {
159161
/// assert_eq!(Ok(9 * 1000), parser.parse("9kB")); // kB is 1000
160162
/// assert_eq!(Ok(2 * 1024), parser.parse("2K")); // K is 1024
161163
/// assert_eq!(Ok(44251 * 1024), parser.parse("0xACDBK")); // 0xACDB is 44251 in decimal
164+
/// assert_eq!(Ok(44251 * 1024 * 1024), parser.parse("0b1010110011011011")); // 0b1010110011011011 is 44251 in decimal, default M
162165
/// ```
163166
pub fn parse(&self, size: &str) -> Result<u128, ParseSizeError> {
164167
if size.is_empty() {
@@ -176,6 +179,11 @@ impl<'parser> Parser<'parser> {
176179
.take(2)
177180
.chain(size.chars().skip(2).take_while(char::is_ascii_hexdigit))
178181
.collect(),
182+
NumberSystem::Binary => size
183+
.chars()
184+
.take(2)
185+
.chain(size.chars().skip(2).take_while(|c| c.is_digit(2)))
186+
.collect(),
179187
_ => size.chars().take_while(char::is_ascii_digit).collect(),
180188
};
181189
let mut unit: &str = &size[numeric_string.len()..];
@@ -268,6 +276,10 @@ impl<'parser> Parser<'parser> {
268276
let trimmed_string = numeric_string.trim_start_matches("0x");
269277
Self::parse_number(trimmed_string, 16, size)?
270278
}
279+
NumberSystem::Binary => {
280+
let trimmed_string = numeric_string.trim_start_matches("0b");
281+
Self::parse_number(trimmed_string, 2, size)?
282+
}
271283
};
272284

273285
number
@@ -328,6 +340,14 @@ impl<'parser> Parser<'parser> {
328340
return NumberSystem::Hexadecimal;
329341
}
330342

343+
// Binary prefix: "0b" followed by at least one binary digit (0 or 1)
344+
// Note: "0b" alone is treated as decimal 0 with suffix "b"
345+
if let Some(prefix) = size.strip_prefix("0b") {
346+
if !prefix.is_empty() {
347+
return NumberSystem::Binary;
348+
}
349+
}
350+
331351
let num_digits: usize = size
332352
.chars()
333353
.take_while(char::is_ascii_digit)
@@ -363,7 +383,9 @@ impl<'parser> Parser<'parser> {
363383
/// assert_eq!(Ok(123), parse_size_u128("123"));
364384
/// assert_eq!(Ok(9 * 1000), parse_size_u128("9kB")); // kB is 1000
365385
/// assert_eq!(Ok(2 * 1024), parse_size_u128("2K")); // K is 1024
366-
/// assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK"));
386+
/// assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK")); // hexadecimal
387+
/// assert_eq!(Ok(10), parse_size_u128("0b1010")); // binary
388+
/// assert_eq!(Ok(10 * 1024), parse_size_u128("0b1010K")); // binary with suffix
367389
/// ```
368390
pub fn parse_size_u128(size: &str) -> Result<u128, ParseSizeError> {
369391
Parser::default().parse(size)
@@ -564,6 +586,7 @@ mod tests {
564586
assert!(parse_size_u64("1Y").is_err());
565587
assert!(parse_size_u64("1R").is_err());
566588
assert!(parse_size_u64("1Q").is_err());
589+
assert!(parse_size_u64("0b1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111").is_err());
567590

568591
assert!(variant_eq(
569592
&parse_size_u64("1Z").unwrap_err(),
@@ -634,6 +657,7 @@ mod tests {
634657
#[test]
635658
fn b_suffix() {
636659
assert_eq!(Ok(3 * 512), parse_size_u64("3b")); // b is 512
660+
assert_eq!(Ok(0), parse_size_u64("0b")); // b should be used as a suffix in this case instead of signifying binary
637661
}
638662

639663
#[test]
@@ -774,6 +798,12 @@ mod tests {
774798
assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK"));
775799
}
776800

801+
#[test]
802+
fn parse_binary_size() {
803+
assert_eq!(Ok(44251), parse_size_u64("0b1010110011011011"));
804+
assert_eq!(Ok(44251 * 1024), parse_size_u64("0b1010110011011011K"));
805+
}
806+
777807
#[test]
778808
#[cfg(target_os = "linux")]
779809
fn parse_percent() {

tests/by-util/test_df.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,53 @@ fn test_block_size_with_suffix() {
648648
assert_eq!(get_header("1GB"), "1GB-blocks");
649649
}
650650

651+
#[test]
652+
fn test_df_binary_block_size() {
653+
fn get_header(block_size: &str) -> String {
654+
let output = new_ucmd!()
655+
.args(&["-B", block_size, "--output=size"])
656+
.succeeds()
657+
.stdout_str_lossy();
658+
output.lines().next().unwrap().trim().to_string()
659+
}
660+
661+
let test_cases = [
662+
("0b1", "1"),
663+
("0b10100", "20"),
664+
("0b1000000000", "512"),
665+
("0b10K", "2K"),
666+
];
667+
668+
for (binary, decimal) in test_cases {
669+
let binary_result = get_header(binary);
670+
let decimal_result = get_header(decimal);
671+
assert_eq!(
672+
binary_result, decimal_result,
673+
"Binary {binary} should equal decimal {decimal}"
674+
);
675+
}
676+
}
677+
678+
#[test]
679+
fn test_df_binary_env_block_size() {
680+
fn get_header(env_var: &str, env_value: &str) -> String {
681+
let output = new_ucmd!()
682+
.env(env_var, env_value)
683+
.args(&["--output=size"])
684+
.succeeds()
685+
.stdout_str_lossy();
686+
output.lines().next().unwrap().trim().to_string()
687+
}
688+
689+
let binary_header = get_header("DF_BLOCK_SIZE", "0b10000000000");
690+
let decimal_header = get_header("DF_BLOCK_SIZE", "1024");
691+
assert_eq!(binary_header, decimal_header);
692+
693+
let binary_header = get_header("BLOCK_SIZE", "0b10000000000");
694+
let decimal_header = get_header("BLOCK_SIZE", "1024");
695+
assert_eq!(binary_header, decimal_header);
696+
}
697+
651698
#[test]
652699
fn test_block_size_in_posix_portability_mode() {
653700
fn get_header(block_size: &str) -> String {
@@ -849,6 +896,32 @@ fn test_invalid_block_size_suffix() {
849896
.stderr_contains("invalid suffix in --block-size argument '1.2'");
850897
}
851898

899+
#[test]
900+
fn test_df_invalid_binary_size() {
901+
new_ucmd!()
902+
.arg("--block-size=0b123")
903+
.fails()
904+
.stderr_contains("invalid suffix in --block-size argument '0b123'");
905+
}
906+
907+
#[test]
908+
fn test_df_binary_edge_cases() {
909+
new_ucmd!()
910+
.arg("-B0b")
911+
.fails()
912+
.stderr_contains("invalid --block-size argument '0b'");
913+
914+
new_ucmd!()
915+
.arg("-B0B")
916+
.fails()
917+
.stderr_contains("invalid suffix in --block-size argument '0B'");
918+
919+
new_ucmd!()
920+
.arg("--block-size=0b1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")
921+
.fails()
922+
.stderr_contains("too large");
923+
}
924+
852925
#[test]
853926
fn test_output_selects_columns() {
854927
let output = new_ucmd!()

tests/by-util/test_du.rs

Lines changed: 137 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,120 @@ fn test_du_env_block_size_hierarchy() {
282282
assert_eq!(expected, result2);
283283
}
284284

285+
#[test]
286+
fn test_du_binary_block_size() {
287+
let ts = TestScenario::new(util_name!());
288+
let at = &ts.fixtures;
289+
let dir = "a";
290+
291+
at.mkdir(dir);
292+
let fpath = at.plus(format!("{dir}/file"));
293+
std::fs::File::create(&fpath)
294+
.expect("cannot create test file")
295+
.set_len(100_000)
296+
.expect("cannot set file size");
297+
298+
let test_cases = [
299+
("0b1", "1"),
300+
("0b10100", "20"),
301+
("0b1000000000", "512"),
302+
("0b10K", "2K"),
303+
];
304+
305+
for (binary, decimal) in test_cases {
306+
let decimal = ts
307+
.ucmd()
308+
.arg(dir)
309+
.arg(format!("--block-size={decimal}"))
310+
.succeeds()
311+
.stdout_move_str();
312+
313+
let binary = ts
314+
.ucmd()
315+
.arg(dir)
316+
.arg(format!("--block-size={binary}"))
317+
.succeeds()
318+
.stdout_move_str();
319+
320+
assert_eq!(
321+
decimal, binary,
322+
"Binary {binary} should equal decimal {decimal}"
323+
);
324+
}
325+
}
326+
327+
#[test]
328+
fn test_du_binary_env_block_size() {
329+
let ts = TestScenario::new(util_name!());
330+
let at = &ts.fixtures;
331+
let dir = "a";
332+
333+
at.mkdir(dir);
334+
let fpath = at.plus(format!("{dir}/file"));
335+
std::fs::File::create(&fpath)
336+
.expect("cannot create test file")
337+
.set_len(100_000)
338+
.expect("cannot set file size");
339+
340+
let expected = ts
341+
.ucmd()
342+
.arg(dir)
343+
.arg("--block-size=1024")
344+
.succeeds()
345+
.stdout_move_str();
346+
347+
let result = ts
348+
.ucmd()
349+
.arg(dir)
350+
.env("DU_BLOCK_SIZE", "0b10000000000")
351+
.succeeds()
352+
.stdout_move_str();
353+
354+
assert_eq!(expected, result);
355+
}
356+
357+
#[test]
358+
fn test_du_invalid_binary_size() {
359+
let ts = TestScenario::new(util_name!());
360+
361+
ts.ucmd()
362+
.arg("--block-size=0b123")
363+
.arg("/tmp")
364+
.fails_with_code(1)
365+
.stderr_only("du: invalid suffix in --block-size argument '0b123'\n");
366+
367+
ts.ucmd()
368+
.arg("--threshold=0b123")
369+
.arg("/tmp")
370+
.fails_with_code(1)
371+
.stderr_only("du: invalid suffix in --threshold argument '0b123'\n");
372+
}
373+
374+
#[test]
375+
fn test_du_binary_edge_cases() {
376+
let ts = TestScenario::new(util_name!());
377+
let at = &ts.fixtures;
378+
at.write("foo", "test");
379+
380+
ts.ucmd()
381+
.arg("-B0b")
382+
.arg("foo")
383+
.fails()
384+
.stderr_only("du: invalid --block-size argument '0b'\n");
385+
386+
ts.ucmd()
387+
.arg("-B0B")
388+
.arg("foo")
389+
.fails()
390+
.stderr_only("du: invalid suffix in --block-size argument '0B'\n");
391+
392+
ts.ucmd()
393+
.arg("--block-size=0b1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")
394+
.arg("foo")
395+
.fails_with_code(1)
396+
.stderr_contains("too large");
397+
}
398+
285399
#[test]
286400
fn test_du_non_existing_files() {
287401
new_ucmd!()
@@ -978,7 +1092,7 @@ fn test_du_threshold() {
9781092
at.write("subdir/links/bigfile.txt", &"x".repeat(10000)); // ~10K file
9791093
at.write("subdir/deeper/deeper_dir/smallfile.txt", "small"); // small file
9801094

981-
let threshold = if cfg!(windows) { "7K" } else { "10K" };
1095+
let threshold = "10K";
9821096

9831097
ts.ucmd()
9841098
.arg("--apparent-size")
@@ -995,6 +1109,27 @@ fn test_du_threshold() {
9951109
.stdout_contains("deeper_dir");
9961110
}
9971111

1112+
#[test]
1113+
#[cfg(not(target_os = "openbsd"))]
1114+
fn test_du_binary_threshold() {
1115+
let ts = TestScenario::new(util_name!());
1116+
let at = &ts.fixtures;
1117+
1118+
at.mkdir_all("subdir/links");
1119+
at.mkdir_all("subdir/deeper/deeper_dir");
1120+
at.write("subdir/links/bigfile.txt", &"x".repeat(10000));
1121+
at.write("subdir/deeper/deeper_dir/smallfile.txt", "small");
1122+
1123+
let threshold_bin = "0b10011100010000";
1124+
1125+
ts.ucmd()
1126+
.arg("--apparent-size")
1127+
.arg(format!("--threshold={threshold_bin}"))
1128+
.succeeds()
1129+
.stdout_contains("links")
1130+
.stdout_does_not_contain("deeper_dir");
1131+
}
1132+
9981133
#[test]
9991134
fn test_du_invalid_threshold() {
10001135
let ts = TestScenario::new(util_name!());
@@ -1528,7 +1663,7 @@ fn test_du_blocksize_zero_do_not_panic() {
15281663
let ts = TestScenario::new(util_name!());
15291664
let at = &ts.fixtures;
15301665
at.write("foo", "some content");
1531-
for block_size in ["0", "00", "000", "0x0"] {
1666+
for block_size in ["0", "00", "000", "0x0", "0b0"] {
15321667
ts.ucmd()
15331668
.arg(format!("-B{block_size}"))
15341669
.arg("foo")

0 commit comments

Comments
 (0)