Skip to content

Commit 3b395de

Browse files
committed
sort: add locale-aware month parsing using ICU
1 parent 16f7350 commit 3b395de

File tree

8 files changed

+281
-45
lines changed

8 files changed

+281
-45
lines changed

Cargo.lock

Lines changed: 137 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ glob = "0.3.1"
326326
half = "2.4.1"
327327
hostname = "0.4"
328328
icu_collator = "2.0.0"
329+
icu_datetime = "2.0.1"
329330
icu_decimal = "2.0.0"
330331
icu_locale = "2.0.0"
331332
icu_provider = "2.0.0"

src/uu/sort/Cargo.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,12 @@ self_cell = { workspace = true }
3434
tempfile = { workspace = true }
3535
thiserror = { workspace = true }
3636
unicode-width = { workspace = true }
37-
uucore = { workspace = true, features = ["fs", "parser-size", "version-cmp"] }
37+
uucore = { workspace = true, features = [
38+
"fs",
39+
"parser-size",
40+
"version-cmp",
41+
"i18n-month",
42+
] }
3843
fluent = { workspace = true }
3944

4045
[target.'cfg(unix)'.dependencies]
@@ -49,6 +54,7 @@ uucore = { workspace = true, features = [
4954
"parser-size",
5055
"version-cmp",
5156
"i18n-collator",
57+
"i18n-month",
5258
] }
5359

5460
[[bin]]

src/uu/sort/src/sort.rs

Lines changed: 3 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ use uucore::error::{FromIo, strip_errno};
4545
use uucore::error::{UError, UResult, USimpleError, UUsageError};
4646
use uucore::extendedbigdecimal::ExtendedBigDecimal;
4747
use uucore::format_usage;
48+
use uucore::i18n::month::month_parse as locale_month_parse;
4849
use uucore::line_ending::LineEnding;
4950
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
5051
use uucore::parser::parse_size::{ParseSizeError, Parser};
@@ -647,7 +648,7 @@ impl<'a> Line<'a> {
647648
.enumerate()
648649
.skip_while(|(_, c)| c.is_ascii_whitespace());
649650

650-
let month = if month_parse(initial_selection) == Month::Unknown {
651+
let month = if locale_month_parse(initial_selection) == 0 {
651652
// We failed to parse a month, which is equivalent to matching nothing.
652653
// Add the "no match for key" marker to the first non-whitespace character.
653654
let first_non_whitespace = month_chars.next();
@@ -2116,49 +2117,8 @@ fn random_shuffle(a: &[u8], b: &[u8], salt: &[u8]) -> Ordering {
21162117
da.cmp(&db)
21172118
}
21182119

2119-
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)]
2120-
enum Month {
2121-
Unknown,
2122-
January,
2123-
February,
2124-
March,
2125-
April,
2126-
May,
2127-
June,
2128-
July,
2129-
August,
2130-
September,
2131-
October,
2132-
November,
2133-
December,
2134-
}
2135-
2136-
/// Parse the beginning string into a Month, returning [`Month::Unknown`] on errors.
2137-
fn month_parse(line: &[u8]) -> Month {
2138-
let line = line.trim_ascii_start();
2139-
2140-
match line.get(..3).map(|x| x.to_ascii_uppercase()).as_deref() {
2141-
Some(b"JAN") => Month::January,
2142-
Some(b"FEB") => Month::February,
2143-
Some(b"MAR") => Month::March,
2144-
Some(b"APR") => Month::April,
2145-
Some(b"MAY") => Month::May,
2146-
Some(b"JUN") => Month::June,
2147-
Some(b"JUL") => Month::July,
2148-
Some(b"AUG") => Month::August,
2149-
Some(b"SEP") => Month::September,
2150-
Some(b"OCT") => Month::October,
2151-
Some(b"NOV") => Month::November,
2152-
Some(b"DEC") => Month::December,
2153-
_ => Month::Unknown,
2154-
}
2155-
}
2156-
21572120
fn month_compare(a: &[u8], b: &[u8]) -> Ordering {
2158-
let ma = month_parse(a);
2159-
let mb = month_parse(b);
2160-
2161-
ma.cmp(&mb)
2121+
locale_month_parse(a).cmp(&locale_month_parse(b))
21622122
}
21632123

21642124
fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(

src/uucore/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ selinux = { workspace = true, optional = true }
7373
icu_collator = { workspace = true, optional = true, features = [
7474
"compiled_data",
7575
] }
76+
icu_datetime = { workspace = true, optional = true, features = [
77+
"compiled_data",
78+
] }
7679
icu_decimal = { workspace = true, optional = true, features = [
7780
"compiled_data",
7881
] }
@@ -143,10 +146,11 @@ format = [
143146
"quoting-style",
144147
"unit-prefix",
145148
]
146-
i18n-all = ["i18n-collator", "i18n-decimal"]
149+
i18n-all = ["i18n-collator", "i18n-decimal", "i18n-month"]
147150
i18n-common = ["icu_locale"]
148151
i18n-collator = ["i18n-common", "icu_collator"]
149152
i18n-decimal = ["i18n-common", "icu_decimal", "icu_provider"]
153+
i18n-month = ["i18n-common", "icu_datetime", "icu_provider", "libc"]
150154
mode = ["libc"]
151155
perms = ["entries", "libc", "walkdir"]
152156
buf-copy = []

src/uucore/src/lib/features/i18n/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ use icu_locale::{Locale, locale};
1111
pub mod collator;
1212
#[cfg(feature = "i18n-decimal")]
1313
pub mod decimal;
14+
#[cfg(feature = "i18n-month")]
15+
pub mod month;
1416

1517
/// The encoding specified by the locale, if specified
1618
/// Currently only supports ASCII and UTF-8 for the sake of simplicity.
@@ -77,6 +79,13 @@ pub fn get_numeric_locale() -> &'static (Locale, UEncoding) {
7779
NUMERIC_LOCALE.get_or_init(|| get_locale_from_env("LC_NUMERIC"))
7880
}
7981

82+
/// Get the time locale from the environment (used for month names, etc.)
83+
pub fn get_time_locale() -> &'static (Locale, UEncoding) {
84+
static TIME_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new();
85+
86+
TIME_LOCALE.get_or_init(|| get_locale_from_env("LC_TIME"))
87+
}
88+
8089
/// Return the encoding deduced from the locale environment variable.
8190
pub fn get_locale_encoding() -> UEncoding {
8291
get_collating_locale().1

0 commit comments

Comments
 (0)