@@ -267,6 +267,7 @@ impl Default for LocaleSelector {
267267}
268268
269269/// Normalize a locale string to standard format.
270+ /// Handles formats like "en-US", "zh-Hant-US", "zh-Hans-CN".
270271fn normalize_locale ( locale : & str ) -> Locale {
271272 let parts: Vec < & str > = locale. split ( '-' ) . collect ( ) ;
272273
@@ -275,23 +276,33 @@ fn normalize_locale(locale: &str) -> Locale {
275276 . map ( |s| s. to_lowercase ( ) )
276277 . unwrap_or_else ( || "en" . to_string ( ) ) ;
277278
278- let region = parts. get ( 1 ) . map ( |s| s. to_uppercase ( ) ) ;
279+ // A 4-letter part is a script subtag (e.g. "Hant", "Hans", "Cyrl").
280+ // A 2-letter or 3-digit part is a region subtag (e.g. "US", "CN").
281+ let mut explicit_script: Option < String > = None ;
282+ let mut region: Option < String > = None ;
279283
280- // Determine script based on language if needed
281- let script = match language. as_str ( ) {
282- "zh" => {
283- // Chinese - Traditional for TW/HK, Simplified otherwise
284- if region. as_deref ( ) == Some ( "TW" ) || region. as_deref ( ) == Some ( "HK" ) {
285- Some ( "Hant" . to_string ( ) )
286- } else {
287- Some ( "Hans" . to_string ( ) )
288- }
284+ for part in parts. iter ( ) . skip ( 1 ) {
285+ if part. len ( ) == 4 && part. chars ( ) . all ( |c| c. is_ascii_alphabetic ( ) ) {
286+ explicit_script = Some ( part[ ..1 ] . to_uppercase ( ) + & part[ 1 ..] . to_lowercase ( ) ) ;
287+ } else {
288+ region = Some ( part. to_uppercase ( ) ) ;
289289 }
290- "sr" => {
291- // Serbian - can be Cyrillic or Latin
292- Some ( "Cyrl" . to_string ( ) )
290+ }
291+
292+ let script = if explicit_script. is_some ( ) {
293+ explicit_script
294+ } else {
295+ match language. as_str ( ) {
296+ "zh" => {
297+ if region. as_deref ( ) == Some ( "TW" ) || region. as_deref ( ) == Some ( "HK" ) {
298+ Some ( "Hant" . to_string ( ) )
299+ } else {
300+ Some ( "Hans" . to_string ( ) )
301+ }
302+ }
303+ "sr" => Some ( "Cyrl" . to_string ( ) ) ,
304+ _ => None ,
293305 }
294- _ => None ,
295306 } ;
296307
297308 Locale {
@@ -442,5 +453,16 @@ mod tests {
442453
443454 let zh_cn = normalize_locale ( "zh-CN" ) ;
444455 assert_eq ! ( zh_cn. script, Some ( "Hans" . to_string( ) ) ) ;
456+
457+ // 3-part locale: language-script-region
458+ let zh_hant_us = normalize_locale ( "zh-Hant-US" ) ;
459+ assert_eq ! ( zh_hant_us. language, "zh" ) ;
460+ assert_eq ! ( zh_hant_us. region, Some ( "US" . to_string( ) ) ) ;
461+ assert_eq ! ( zh_hant_us. script, Some ( "Hant" . to_string( ) ) ) ;
462+
463+ let zh_hans_us = normalize_locale ( "zh-Hans-US" ) ;
464+ assert_eq ! ( zh_hans_us. language, "zh" ) ;
465+ assert_eq ! ( zh_hans_us. region, Some ( "US" . to_string( ) ) ) ;
466+ assert_eq ! ( zh_hans_us. script, Some ( "Hans" . to_string( ) ) ) ;
445467 }
446468}
0 commit comments