Skip to content

Commit 1fb518a

Browse files
Fix -u-sd (#7341)
Validate that the subdivision matches the region, and derive the region from the subdivision in region-priority mode. The latter step might need to be done by the fallbacker actually. For a locale like `en-u-sd-gbsct`, should the chain be * `en-u-sd-gbsct` -> `en-GB` -> `en`, or * `en-GB-u-sd-gbsct` -> `en-GB` -> `en`? #4413
1 parent 884fe85 commit 1fb518a

File tree

5 files changed

+122
-37
lines changed

5 files changed

+122
-37
lines changed

components/locale/src/fallback/algorithms.rs

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@ use super::*;
99

1010
impl LocaleFallbackerWithConfig<'_> {
1111
pub(crate) fn normalize(&self, locale: &mut DataLocale, default_script: &mut Option<Script>) {
12-
// 0. If there is an invalid "sd" subtag, drop it
12+
// 0. If there is an invalid or trivial "sd" subtag, drop it
1313
if let Some(subdivision) = locale.subdivision.take() {
1414
if let Some(region) = locale.region {
1515
if subdivision
1616
.as_str()
1717
.starts_with(region.to_tinystr().to_ascii_lowercase().as_str())
18+
&& !subdivision.as_str().ends_with("zzzz")
1819
{
1920
locale.subdivision = Some(subdivision);
2021
}
@@ -240,6 +241,8 @@ impl LocaleFallbackIteratorInner<'_> {
240241
#[cfg(test)]
241242
mod tests {
242243
use super::*;
244+
use icu_locale_core::preferences::LocalePreferences;
245+
use icu_locale_core::Locale;
243246
use writeable::Writeable;
244247

245248
struct TestCase {
@@ -472,13 +475,23 @@ mod tests {
472475
let fallbacker_no_data = fallbacker_no_data.as_borrowed();
473476
let fallbacker_with_data = LocaleFallbacker::new();
474477
for cas in TEST_CASES {
475-
for (priority, expected_chain) in [
478+
let prefs = LocalePreferences::from(&cas.input.parse::<Locale>().unwrap());
479+
for (priority, data_locale, expected_chain) in [
476480
(
477481
LocaleFallbackPriority::Language,
482+
prefs.to_data_locale_language_priority(),
478483
cas.expected_language_chain,
479484
),
480-
(LocaleFallbackPriority::Script, cas.expected_script_chain),
481-
(LocaleFallbackPriority::Region, cas.expected_region_chain),
485+
(
486+
LocaleFallbackPriority::Script,
487+
prefs.to_data_locale_language_priority(),
488+
cas.expected_script_chain,
489+
),
490+
(
491+
LocaleFallbackPriority::Region,
492+
prefs.to_data_locale_region_priority(),
493+
cas.expected_region_chain,
494+
),
482495
] {
483496
let mut config = LocaleFallbackConfig::default();
484497
config.priority = priority;
@@ -487,9 +500,7 @@ mod tests {
487500
} else {
488501
fallbacker_no_data
489502
};
490-
let mut it = fallbacker
491-
.for_config(config)
492-
.fallback_for(cas.input.parse().unwrap());
503+
let mut it = fallbacker.for_config(config).fallback_for(data_locale);
493504
let mut actual_chain = Vec::new();
494505
for i in 0..20 {
495506
if i == 19 {

components/locale_core/src/data.rs

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ use core::str::FromStr;
4949
/// DataLocale::from(locale!("hi-IN-u-sd-inas"))
5050
/// );
5151
/// ```
52-
#[derive(Clone, Copy, PartialEq, Hash, Eq)]
52+
#[derive(Clone, Copy)]
5353
#[non_exhaustive]
5454
pub struct DataLocale {
5555
/// Language subtag
@@ -61,9 +61,24 @@ pub struct DataLocale {
6161
/// Variant subtag
6262
pub variant: Option<Variant>,
6363
/// Subivision (-u-sd-) subtag
64+
// TODO(3.0): Use `SubdivisionSuffix` type
6465
pub subdivision: Option<Subtag>,
6566
}
6667

68+
impl PartialEq for DataLocale {
69+
fn eq(&self, other: &Self) -> bool {
70+
self.as_tuple() == other.as_tuple()
71+
}
72+
}
73+
74+
impl Eq for DataLocale {}
75+
76+
impl Hash for DataLocale {
77+
fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
78+
self.as_tuple().hash(state);
79+
}
80+
}
81+
6782
impl Default for DataLocale {
6883
fn default() -> Self {
6984
Self {
@@ -205,24 +220,46 @@ impl DataLocale {
205220
Ok(())
206221
}
207222

223+
fn region_and_subdivision(&self) -> Option<unicode_ext::SubdivisionId> {
224+
self.subdivision
225+
.and_then(|s| unicode_ext::SubdivisionId::try_from_str(s.as_str()).ok())
226+
.or(self.region.map(|region| unicode_ext::SubdivisionId {
227+
region,
228+
suffix: unicode_ext::SubdivisionSuffix::UNKNOWN,
229+
}))
230+
}
231+
208232
fn as_tuple(
209233
&self,
210234
) -> (
211235
Language,
212236
Option<Script>,
213-
Option<Region>,
237+
Option<unicode_ext::SubdivisionId>,
214238
Option<Variant>,
215-
Option<Subtag>,
216239
) {
217240
(
218241
self.language,
219242
self.script,
220-
self.region,
243+
self.region_and_subdivision(),
221244
self.variant,
222-
self.subdivision,
223245
)
224246
}
225247

248+
pub(crate) fn from_parts(
249+
language: Language,
250+
script: Option<Script>,
251+
region: Option<unicode_ext::SubdivisionId>,
252+
variant: Option<Variant>,
253+
) -> Self {
254+
Self {
255+
language,
256+
script,
257+
region: region.map(|sd| sd.region),
258+
variant,
259+
subdivision: region.map(|sd| sd.into_subtag()),
260+
}
261+
}
262+
226263
/// Returns an ordering suitable for use in [`BTreeSet`].
227264
///
228265
/// [`BTreeSet`]: alloc::collections::BTreeSet
@@ -360,8 +397,8 @@ impl DataLocale {
360397
keywords: unicode_ext::Keywords::new_single(
361398
RegionalSubdivision::UNICODE_EXTENSION_KEY,
362399
RegionalSubdivision(
363-
unicode_ext::SubdivisionId::try_from_str(self.subdivision?.as_str())
364-
.ok()?,
400+
self.region_and_subdivision()
401+
.filter(|sd| !sd.suffix.is_unknown())?,
365402
)
366403
.into(),
367404
),
@@ -386,12 +423,16 @@ fn test_data_locale_to_string() {
386423
},
387424
TestCase {
388425
locale: "und-u-sd-sdd",
389-
expected: "und-u-sd-sdd",
426+
expected: "und-SD-u-sd-sdd",
390427
},
391428
TestCase {
392429
locale: "en-ZA-u-sd-zaa",
393430
expected: "en-ZA-u-sd-zaa",
394431
},
432+
TestCase {
433+
locale: "en-ZA-u-sd-sdd",
434+
expected: "en-ZA",
435+
},
395436
] {
396437
let locale = cas.locale.parse::<DataLocale>().unwrap();
397438
writeable::assert_writeable_eq!(locale, cas.expected);

components/locale_core/src/extensions/unicode/subdivision.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ impl_tinystr_subtag!(
4646
["toolooong"],
4747
);
4848

49+
impl SubdivisionSuffix {
50+
pub(crate) const UNKNOWN: Self = subdivision_suffix!("zzzz");
51+
52+
pub(crate) fn is_unknown(self) -> bool {
53+
self == Self::UNKNOWN
54+
}
55+
}
56+
4957
/// A Subivision Id as defined in [`Unicode Locale Identifier`].
5058
///
5159
/// Subdivision Id is used in [`Unicode`] extensions:

components/locale_core/src/preferences/locale.rs

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,31 +26,46 @@ pub struct LocalePreferences {
2626
}
2727

2828
impl LocalePreferences {
29-
fn to_data_locale_maybe_region_priority(self, region_priority: bool) -> DataLocale {
30-
DataLocale {
31-
language: self.language,
32-
script: self.script,
33-
region: match (self.region, self.region_override) {
34-
(_, Some(r)) if region_priority => Some(r.region),
35-
(r, _) => r,
36-
},
37-
variant: self.variant,
38-
subdivision: self.subdivision.map(|r| r.into_subtag()),
39-
}
40-
}
41-
4229
/// Convert to a DataLocale, with region-based fallback priority
4330
///
4431
/// Most users should use `icu_provider::marker::make_locale()` instead.
4532
pub fn to_data_locale_region_priority(self) -> DataLocale {
46-
self.to_data_locale_maybe_region_priority(true)
33+
if let Some(ro) = self.region_override {
34+
return DataLocale::from_parts(self.language, self.script, Some(*ro), self.variant);
35+
}
36+
37+
self.to_data_locale_language_priority()
4738
}
4839

4940
/// Convert to a DataLocale, with language-based fallback priority
5041
///
5142
/// Most users should use `icu_provider::marker::make_locale()` instead.
5243
pub fn to_data_locale_language_priority(self) -> DataLocale {
53-
self.to_data_locale_maybe_region_priority(false)
44+
use crate::extensions::unicode::{SubdivisionId, SubdivisionSuffix};
45+
46+
let region = if let Some(sd) = self.subdivision {
47+
if let Some(region) = self.region {
48+
// Discard the subdivison if it doesn't match the region
49+
Some(SubdivisionId {
50+
region,
51+
suffix: if sd.region == region {
52+
sd.suffix
53+
} else {
54+
SubdivisionSuffix::UNKNOWN
55+
},
56+
})
57+
} else {
58+
// Use the subdivision's region if there's no region
59+
Some(*sd)
60+
}
61+
} else {
62+
self.region.map(|region| SubdivisionId {
63+
region,
64+
suffix: SubdivisionSuffix::UNKNOWN,
65+
})
66+
};
67+
68+
DataLocale::from_parts(self.language, self.script, region, self.variant)
5469
}
5570
}
5671
impl Default for LocalePreferences {
@@ -201,8 +216,8 @@ mod tests {
201216
},
202217
TestCase {
203218
input: "en-u-sd-ustx",
204-
language_priority: "en-u-sd-ustx",
205-
region_priority: "en-u-sd-ustx",
219+
language_priority: "en-US-u-sd-ustx",
220+
region_priority: "en-US-u-sd-ustx",
206221
},
207222
TestCase {
208223
input: "en-US-u-sd-ustx",
@@ -219,15 +234,25 @@ mod tests {
219234
language_priority: "en-US",
220235
region_priority: "en-GB",
221236
},
237+
TestCase {
238+
input: "en-US-u-sd-gbzzzz",
239+
language_priority: "en-US",
240+
region_priority: "en-US",
241+
},
222242
TestCase {
223243
input: "en-u-rg-gbzzzz-sd-ustx",
224-
language_priority: "en-u-sd-ustx",
225-
region_priority: "en-GB-u-sd-ustx", // does this make sense?
244+
language_priority: "en-US-u-sd-ustx",
245+
region_priority: "en-GB",
226246
},
227247
TestCase {
228248
input: "en-US-u-rg-gbzzzz-sd-ustx",
229249
language_priority: "en-US-u-sd-ustx",
230-
region_priority: "en-GB-u-sd-ustx", // does this make sense?
250+
region_priority: "en-GB",
251+
},
252+
TestCase {
253+
input: "en-US-u-rg-gbeng-sd-ustx",
254+
language_priority: "en-US-u-sd-ustx",
255+
region_priority: "en-GB-u-sd-gbeng",
231256
},
232257
];
233258
for test_case in test_cases.iter() {

provider/baked/tests/data/hello_world_v1.rs.data

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)