Skip to content

Commit 7153e28

Browse files
refactor(sort): Move locale collation init to uucore
Move locale collation initialization logic from sort.rs to uucore/i18n/collator.rs as suggested by maintainer. - Add init_locale_collation() function in collator.rs - Can be reused by ls, uniq, comm, join, and other utilities - Simplifies sort.rs by ~15 lines - No functional changes, just code reorganization
1 parent 044f9b7 commit 7153e28

File tree

2 files changed

+42
-20
lines changed

2 files changed

+42
-20
lines changed

src/uu/sort/src/sort.rs

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ use uucore::error::{UError, UResult, USimpleError, UUsageError};
4949
use uucore::extendedbigdecimal::ExtendedBigDecimal;
5050
use uucore::format_usage;
5151
#[cfg(feature = "i18n-collator")]
52-
use uucore::i18n::collator::{locale_cmp, try_init_collator};
52+
use uucore::i18n::collator::locale_cmp;
5353
use uucore::line_ending::LineEnding;
5454
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
5555
use uucore::parser::parse_size::{ParseSizeError, Parser};
@@ -1401,27 +1401,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
14011401

14021402
let output = Output::new(matches.get_one::<OsString>(options::OUTPUT))?;
14031403

1404-
// Check if we need locale-aware collation and initialize collator if needed
1404+
// Initialize locale collation if needed (UTF-8 locales)
14051405
// This MUST happen before init_precomputed() to avoid the performance regression
14061406
#[cfg(feature = "i18n-collator")]
1407-
let needs_locale_collation = {
1408-
use uucore::i18n::collator::{AlternateHandling, CollatorOptions};
1409-
use uucore::i18n::{UEncoding, get_locale_encoding};
1410-
1411-
let is_utf8_locale = get_locale_encoding() == UEncoding::Utf8;
1412-
1413-
if is_utf8_locale {
1414-
// Initialize ICU collator with Shifted mode to match GNU sort behavior
1415-
let mut opts = CollatorOptions::default();
1416-
opts.alternate_handling = Some(AlternateHandling::Shifted);
1417-
1418-
if !try_init_collator(opts) {
1419-
eprintln!("sort: warning: Failed to initialize locale collator");
1420-
}
1421-
}
1422-
1423-
is_utf8_locale
1424-
};
1407+
let needs_locale_collation = uucore::i18n::collator::init_locale_collation();
14251408

14261409
#[cfg(not(feature = "i18n-collator"))]
14271410
let needs_locale_collation = false;

src/uucore/src/lib/features/i18n/collator.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,45 @@ pub fn init_collator(opts: CollatorOptions) {
3030
.expect("Collator already initialized");
3131
}
3232

33+
/// Initialize the collator for locale-aware string comparison if needed.
34+
///
35+
/// This function checks if the current locale requires locale-aware collation
36+
/// (UTF-8 encoding) and initializes the ICU collator with appropriate settings
37+
/// if necessary. For C/POSIX locales, no initialization is needed as byte
38+
/// comparison is sufficient.
39+
///
40+
/// # Returns
41+
///
42+
/// `true` if the collator was initialized for a UTF-8 locale, `false` if
43+
/// using C/POSIX locale (no initialization needed).
44+
///
45+
/// # Example
46+
///
47+
/// ```
48+
/// use uucore::i18n::collator::init_locale_collation;
49+
///
50+
/// if init_locale_collation() {
51+
/// // Using locale-aware collation
52+
/// } else {
53+
/// // Using byte comparison (C/POSIX locale)
54+
/// }
55+
/// ```
56+
pub fn init_locale_collation() -> bool {
57+
use crate::i18n::{get_locale_encoding, UEncoding};
58+
59+
// Check if we need locale-aware collation
60+
if get_locale_encoding() != UEncoding::Utf8 {
61+
// C/POSIX locale - no collator needed
62+
return false;
63+
}
64+
65+
// UTF-8 locale - initialize collator with Shifted mode to match GNU behavior
66+
let mut opts = CollatorOptions::default();
67+
opts.alternate_handling = Some(AlternateHandling::Shifted);
68+
69+
try_init_collator(opts)
70+
}
71+
3372
/// Compare both strings with regard to the current locale.
3473
pub fn locale_cmp(left: &[u8], right: &[u8]) -> Ordering {
3574
// If the detected locale is 'C', just do byte-wise comparison

0 commit comments

Comments
 (0)