Skip to content

Commit 5c459a1

Browse files
Kaua-Klassmannmattsu2020
authored andcommitted
fix: numeric sort (-n) does not recognize thousand separators (#10339)
1 parent c52e487 commit 5c459a1

16 files changed

+170
-39
lines changed

src/uu/sort/src/sort.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ use uucore::display::Quotable;
4747
use uucore::error::{FromIo, strip_errno};
4848
use uucore::error::{UError, UResult, USimpleError, UUsageError};
4949
use uucore::extendedbigdecimal::ExtendedBigDecimal;
50-
use uucore::format_usage;
5150
#[cfg(feature = "i18n-collator")]
5251
use uucore::i18n::collator::locale_cmp;
5352
use uucore::i18n::decimal::locale_decimal_separator;
@@ -59,6 +58,7 @@ use uucore::posix::{MODERN, TRADITIONAL};
5958
use uucore::show_error;
6059
use uucore::translate;
6160
use uucore::version_cmp::version_cmp;
61+
use uucore::{format_usage, i18n};
6262

6363
use crate::buffer_hint::automatic_buffer_size;
6464
use crate::tmp_dir::TmpDirWrapper;
@@ -1200,11 +1200,21 @@ impl FieldSelector {
12001200
};
12011201
let mut range_str = &line[self.get_range(line, tokens)];
12021202
if self.settings.mode == SortMode::Numeric || self.settings.mode == SortMode::HumanNumeric {
1203+
// Get the thousands separator from the locale, handling cases where the separator is empty or multi-character
1204+
let locale_thousands_separator = i18n::decimal::locale_grouping_separator().as_bytes();
1205+
1206+
// Upstream GNU coreutils ignore multibyte thousands separators
1207+
// (FIXME in C source). We keep the same single-byte behavior.
1208+
let thousands_separator = match locale_thousands_separator {
1209+
[b] => Some(*b),
1210+
_ => None,
1211+
};
1212+
12031213
// Parse NumInfo for this number.
1204-
let (info, num_range) = NumInfo::parse(
1205-
range_str,
1206-
&numeric_locale.num_info_settings(self.settings.mode == SortMode::HumanNumeric),
1207-
);
1214+
let mut parse_settings =
1215+
numeric_locale.num_info_settings(self.settings.mode == SortMode::HumanNumeric);
1216+
parse_settings.thousands_separator = thousands_separator;
1217+
let (info, num_range) = NumInfo::parse(range_str, &parse_settings);
12081218
// Shorten the range to what we need to pass to numeric_str_cmp later.
12091219
range_str = &range_str[num_range];
12101220
Selection::WithNumInfo(range_str, info)

src/uucore/src/lib/features/i18n/decimal.rs

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,47 @@ pub fn locale_decimal_separator() -> &'static str {
3737
DECIMAL_SEP.get_or_init(|| get_decimal_separator(get_numeric_locale().0.clone()))
3838
}
3939

40+
/// Return the grouping separator for the given locale
41+
fn get_grouping_separator(loc: Locale) -> String {
42+
let data_locale = DataLocale::from(loc);
43+
44+
let request = DataRequest {
45+
id: DataIdentifierBorrowed::for_locale(&data_locale),
46+
metadata: DataRequestMetadata::default(),
47+
};
48+
49+
let response: DataResponse<DecimalSymbolsV1> =
50+
icu_decimal::provider::Baked.load(request).unwrap();
51+
52+
response.payload.get().grouping_separator().to_string()
53+
}
54+
55+
/// Return the grouping separator from the language we're working with.
56+
/// Example:
57+
/// Say we need to format 1,000
58+
/// en_US: 1,000 -> grouping separator is ','
59+
/// fr_FR: 1 000 -> grouping separator is '\u{202f}'
60+
pub fn locale_grouping_separator() -> &'static str {
61+
static GROUPING_SEP: OnceLock<String> = OnceLock::new();
62+
63+
GROUPING_SEP.get_or_init(|| get_grouping_separator(get_numeric_locale().0.clone()))
64+
}
65+
4066
#[cfg(test)]
4167
mod tests {
4268
use icu_locale::locale;
4369

44-
use super::get_decimal_separator;
70+
use super::{get_decimal_separator, get_grouping_separator};
4571

4672
#[test]
47-
fn test_simple_separator() {
73+
fn test_simple_decimal_separator() {
4874
assert_eq!(get_decimal_separator(locale!("en")), ".");
4975
assert_eq!(get_decimal_separator(locale!("fr")), ",");
5076
}
77+
78+
#[test]
79+
fn test_simple_grouping_separator() {
80+
assert_eq!(get_grouping_separator(locale!("en")), ",");
81+
assert_eq!(get_grouping_separator(locale!("fr")), "\u{202f}");
82+
}
5183
}

tests/by-util/test_sort.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,14 @@ fn test_multiple_decimals_numeric() {
260260
);
261261
}
262262

263+
#[test]
264+
fn test_multiple_groupings_numeric() {
265+
test_helper(
266+
"multiple_groupings_numeric",
267+
&["-n", "--numeric-sort", "--sort=numeric", "--sort=n"],
268+
);
269+
}
270+
263271
#[test]
264272
fn test_numeric_with_trailing_invalid_chars() {
265273
test_helper(
@@ -2424,18 +2432,18 @@ _
24242432
__
24252433
1
24262434
_
2427-
2,5
2428-
_
24292435
2.4
24302436
___
2437+
2,5
2438+
_
24312439
2.,,3
24322440
__
24332441
2.4
24342442
___
2435-
2,,3
2436-
_
24372443
2.4
24382444
___
2445+
2,,3
2446+
_
24392447
1a
24402448
_
24412449
2b

tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ CARAvan
2121
8.013
2222
45
2323
46.89
24-
576,446.88800000
25-
576,446.890
2624
4567.
2725
37800
26+
576,446.88800000
27+
576,446.890
2828
4798908.340000000000
2929
4798908.45
3030
4798908.8909800

tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,18 @@ __
6767
46.89
6868
_____
6969
_____
70-
576,446.88800000
71-
___
72-
________________
73-
576,446.890
74-
___
75-
___________
7670
4567.
7771
_____
7872
____________________
7973
>>>>37800
8074
_____
8175
_________
76+
576,446.88800000
77+
___
78+
________________
79+
576,446.890
80+
___
81+
___________
8282
4798908.340000000000
8383
____________________
8484
____________________

tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ CARAvan
2424
8.013
2525
45
2626
46.89
27-
576,446.890
28-
576,446.88800000
2927
4567.
3028
37800
29+
576,446.88800000
30+
576,446.890
3131
4798908.340000000000
3232
4798908.45
3333
4798908.8909800

tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,14 @@ _____
5050
__
5151
46.89
5252
_____
53-
576,446.890
54-
___
55-
576,446.88800000
56-
___
5753
4567.
5854
_____
5955
>>>>37800
6056
_____
57+
576,446.88800000
58+
___
59+
576,446.890
60+
___
6161
4798908.340000000000
6262
____________________
6363
4798908.45

tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
8.013
1212
45
1313
46.89
14-
576,446.890
1514
4567.
1615
37800
16+
576,446.88800000
17+
576,446.890
1718
4798908.340000000000
1819
4798908.45
1920
4798908.8909800

tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,14 @@ _____
2424
__
2525
46.89
2626
_____
27-
576,446.890
28-
___
2927
4567.
3028
_____
3129
>>>>37800
3230
_____
31+
576,446.88800000
32+
___
33+
576,446.890
34+
___
3335
4798908.340000000000
3436
____________________
3537
4798908.45

tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
4798908.8909800
22
4798908.45
33
4798908.340000000000
4+
576,446.890
5+
576,446.88800000
46
37800
57
4567.
6-
576,446.890
78
46.89
89
45
910
8.013

0 commit comments

Comments
 (0)