Skip to content

Commit cd93429

Browse files
test(sort): Add locale collation tests for both C and UTF-8
locales
1 parent f1bcf0e commit cd93429

File tree

1 file changed

+35
-45
lines changed

1 file changed

+35
-45
lines changed

tests/by-util/test_sort.rs

Lines changed: 35 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2465,10 +2465,10 @@ fn test_start_buffer() {
24652465

24662466
#[test]
24672467
fn test_locale_collation_c_locale() {
2468-
// In C locale, sorting should be pure byte order
2469-
// Accented characters (UTF-8) sort after ASCII letters
2468+
// C locale uses byte order - this is deterministic and tests the fix for #9148
2469+
// Accented characters (UTF-8 multibyte) sort after ASCII letters
24702470
let input = \ne\nE\na\nA\nz\n";
2471-
// C locale: byte order (A=0x41, E=0x45, a=0x61, e=0x65, z=0x7a, é=0xc3a9)
2471+
// C locale byte order: A=0x41, E=0x45, a=0x61, e=0x65, z=0x7A, é=0xC3 0xA9
24722472
let expected = "A\nE\na\ne\nz\né\n";
24732473

24742474
new_ucmd!()
@@ -2480,54 +2480,44 @@ fn test_locale_collation_c_locale() {
24802480

24812481
#[test]
24822482
fn test_locale_collation_utf8() {
2483-
// Skip if UTF-8 locale is not available
2484-
let Ok(locale) = env::var("LOCALE_FR_UTF8") else {
2485-
return;
2486-
};
2487-
if locale == "none" {
2488-
return;
2489-
}
2490-
2491-
// In UTF-8 locale with collation, accented chars sort near base chars
2492-
// "é" should sort near "e", not at the end
2483+
// Test UTF-8 locale handling - behavior depends on i18n-collator feature
2484+
// With feature: locale-aware collation (é sorts near e)
2485+
// Without feature: byte order (é after z, since 0xC3A9 > 0x7A)
24932486
let input = "z\né\ne\na\n";
24942487

2495-
let result = new_ucmd!().env("LC_ALL", &locale).pipe_in(input).succeeds();
2488+
let result = new_ucmd!()
2489+
.env("LC_ALL", "en_US.UTF-8")
2490+
.pipe_in(input)
2491+
.succeeds();
24962492

24972493
let output = result.stdout_str();
2498-
// In a proper locale, 'a' comes first, then 'e'/'é' together, then 'z'
2499-
// The exact order of e vs é depends on locale, but both should come before z
2500-
assert!(
2501-
output.starts_with("a\n"),
2502-
"Expected 'a' first in locale-aware sort, got: {output}"
2503-
);
2504-
assert!(
2505-
output.ends_with("z\n"),
2506-
"Expected 'z' last in locale-aware sort, got: {output}"
2507-
);
2508-
}
2509-
2510-
#[test]
2511-
fn test_locale_collation_shifted_punctuation() {
2512-
// Test that shifted alternate handling works (punctuation/spaces as secondary)
2513-
// In shifted mode, "a b" and "ab" should sort together, with space being secondary
2514-
let Ok(locale) = env::var("LOCALE_FR_UTF8") else {
2515-
return;
2516-
};
2517-
if locale == "none" {
2518-
return;
2519-
}
2520-
2521-
let input = "ab\na b\na-b\n";
2494+
let lines: Vec<&str> = output.lines().collect();
25222495

2523-
let result = new_ucmd!().env("LC_ALL", &locale).pipe_in(input).succeeds();
2496+
assert_eq!(lines.len(), 4, "Expected 4 sorted lines");
2497+
assert_eq!(lines[0], "a", "'a' (0x61) should always sort first");
25242498

2525-
// All three should sort together since base letters are the same
2526-
// The exact order depends on shifted handling, but they shouldn't be
2527-
// wildly separated like they would be in byte order
2528-
let output = result.stdout_str();
2529-
let lines: Vec<&str> = output.lines().collect();
2530-
assert_eq!(lines.len(), 3, "Expected 3 lines, got: {output}");
2499+
// Validate based on which collation mode is active
2500+
if lines[3] == "é" {
2501+
// Byte order mode: é (0xC3A9) > z (0x7A)
2502+
assert_eq!(
2503+
lines,
2504+
vec!["a", "e", "z", "é"],
2505+
"Byte order mode: expected a < e < z < é"
2506+
);
2507+
} else {
2508+
// Locale collation mode: é sorts with base letter e
2509+
assert_eq!(lines[3], "z", "Locale mode: 'z' should sort last");
2510+
let z_pos = lines.iter().position(|&x| x == "z").unwrap();
2511+
let e_pos = lines.iter().position(|&x| x == "e").unwrap();
2512+
let e_accent_pos = lines.iter().position(|&x| x == "é").unwrap();
2513+
assert!(
2514+
e_pos < z_pos && e_accent_pos < z_pos,
2515+
"Locale mode: 'e' ({}) and 'é' ({}) should sort before 'z' ({})",
2516+
e_pos,
2517+
e_accent_pos,
2518+
z_pos
2519+
);
2520+
}
25312521
}
25322522

25332523
/* spell-checker: enable */

0 commit comments

Comments
 (0)