@@ -2465,10 +2465,10 @@ fn test_start_buffer() {
24652465
24662466#[ test]
24672467fn test_locale_collation_c_locale ( ) {
2468- // In C locale, sorting should be pure byte order
2469- // Accented characters (UTF-8) sort after ASCII letters
2468+ // C locale uses byte order - this is deterministic and tests the fix for #9148
2469+ // Accented characters (UTF-8 multibyte ) sort after ASCII letters
24702470 let input = "é\n e\n E\n a\n A\n z\n " ;
2471- // C locale: byte order ( A=0x41, E=0x45, a=0x61, e=0x65, z=0x7a , é=0xc3a9)
2471+ // C locale byte order: A=0x41, E=0x45, a=0x61, e=0x65, z=0x7A , é=0xC3 0xA9
24722472 let expected = "A\n E\n a\n e\n z\n é\n " ;
24732473
24742474 new_ucmd ! ( )
@@ -2480,54 +2480,44 @@ fn test_locale_collation_c_locale() {
24802480
24812481#[ test]
24822482fn test_locale_collation_utf8 ( ) {
2483- // Skip if UTF-8 locale is not available
2484- let Ok ( locale) = env:: var ( "LOCALE_FR_UTF8" ) else {
2485- return ;
2486- } ;
2487- if locale == "none" {
2488- return ;
2489- }
2490-
2491- // In UTF-8 locale with collation, accented chars sort near base chars
2492- // "é" should sort near "e", not at the end
2483+ // Test UTF-8 locale handling - behavior depends on i18n-collator feature
2484+ // With feature: locale-aware collation (é sorts near e)
2485+ // Without feature: byte order (é after z, since 0xC3A9 > 0x7A)
24932486 let input = "z\n é\n e\n a\n " ;
24942487
2495- let result = new_ucmd ! ( ) . env ( "LC_ALL" , & locale) . pipe_in ( input) . succeeds ( ) ;
2488+ let result = new_ucmd ! ( )
2489+ . env ( "LC_ALL" , "en_US.UTF-8" )
2490+ . pipe_in ( input)
2491+ . succeeds ( ) ;
24962492
24972493 let output = result. stdout_str ( ) ;
2498- // In a proper locale, 'a' comes first, then 'e'/'é' together, then 'z'
2499- // The exact order of e vs é depends on locale, but both should come before z
2500- assert ! (
2501- output. starts_with( "a\n " ) ,
2502- "Expected 'a' first in locale-aware sort, got: {output}"
2503- ) ;
2504- assert ! (
2505- output. ends_with( "z\n " ) ,
2506- "Expected 'z' last in locale-aware sort, got: {output}"
2507- ) ;
2508- }
2509-
2510- #[ test]
2511- fn test_locale_collation_shifted_punctuation ( ) {
2512- // Test that shifted alternate handling works (punctuation/spaces as secondary)
2513- // In shifted mode, "a b" and "ab" should sort together, with space being secondary
2514- let Ok ( locale) = env:: var ( "LOCALE_FR_UTF8" ) else {
2515- return ;
2516- } ;
2517- if locale == "none" {
2518- return ;
2519- }
2520-
2521- let input = "ab\n a b\n a-b\n " ;
2494+ let lines: Vec < & str > = output. lines ( ) . collect ( ) ;
25222495
2523- let result = new_ucmd ! ( ) . env ( "LC_ALL" , & locale) . pipe_in ( input) . succeeds ( ) ;
2496+ assert_eq ! ( lines. len( ) , 4 , "Expected 4 sorted lines" ) ;
2497+ assert_eq ! ( lines[ 0 ] , "a" , "'a' (0x61) should always sort first" ) ;
25242498
2525- // All three should sort together since base letters are the same
2526- // The exact order depends on shifted handling, but they shouldn't be
2527- // wildly separated like they would be in byte order
2528- let output = result. stdout_str ( ) ;
2529- let lines: Vec < & str > = output. lines ( ) . collect ( ) ;
2530- assert_eq ! ( lines. len( ) , 3 , "Expected 3 lines, got: {output}" ) ;
2499+ // Validate based on which collation mode is active
2500+ if lines[ 3 ] == "é" {
2501+ // Byte order mode: é (0xC3A9) > z (0x7A)
2502+ assert_eq ! (
2503+ lines,
2504+ vec![ "a" , "e" , "z" , "é" ] ,
2505+ "Byte order mode: expected a < e < z < é"
2506+ ) ;
2507+ } else {
2508+ // Locale collation mode: é sorts with base letter e
2509+ assert_eq ! ( lines[ 3 ] , "z" , "Locale mode: 'z' should sort last" ) ;
2510+ let z_pos = lines. iter ( ) . position ( |& x| x == "z" ) . unwrap ( ) ;
2511+ let e_pos = lines. iter ( ) . position ( |& x| x == "e" ) . unwrap ( ) ;
2512+ let e_accent_pos = lines. iter ( ) . position ( |& x| x == "é" ) . unwrap ( ) ;
2513+ assert ! (
2514+ e_pos < z_pos && e_accent_pos < z_pos,
2515+ "Locale mode: 'e' ({}) and 'é' ({}) should sort before 'z' ({})" ,
2516+ e_pos,
2517+ e_accent_pos,
2518+ z_pos
2519+ ) ;
2520+ }
25312521}
25322522
25332523/* spell-checker: enable */
0 commit comments