Skip to content

Commit dcdab07

Browse files
committed
Test
1 parent 20fb625 commit dcdab07

File tree

3 files changed

+95
-22
lines changed

3 files changed

+95
-22
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"release": "0.0.0",
2+
"release": "43.0.0",
33
"generatedAt": "",
44
"locales": []
55
}

scripts/ingest/build-cldr-locales.ts

Lines changed: 85 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,14 @@
1818

1919
import fs from 'node:fs/promises';
2020
import path from 'node:path';
21-
import territoryInfo from 'cldr-core/supplemental/territoryInfo.json';
21+
22+
const CLDR_RELEASE = '43.0.0';
23+
const CLDR_CORE_BASE = `https://cdn.jsdelivr.net/npm/cldr-core@${CLDR_RELEASE}`;
24+
const CHARTS_TSV_BASE = `https://raw.githubusercontent.com/unicode-org/cldr-staging/main/docs/charts/${CLDR_RELEASE.replace(
25+
/\.0\.0$/,
26+
'',
27+
)}/tsv`;
28+
const CLDR_REPO_RAW_BASE = 'https://raw.githubusercontent.com/unicode-org/cldr/main/common/main';
2229

2330
const OUTPUT_FILE = path.join(process.cwd(), 'public/data/unicode/cldrLocales.json');
2431

@@ -71,10 +78,17 @@ interface RawMissingCountsRow {
7178
}
7279

7380
interface AvailableLocales {
74-
core: string[];
75-
modern: string[];
76-
full: string[];
77-
defaultContent: string[];
81+
/**
82+
* Locales in the “core” coverage tier. Not all releases publish a core
83+
* list, so this property is optional.
84+
*/
85+
core?: string[];
86+
/** Locales in the “modern” coverage tier. */
87+
modern?: string[];
88+
/** Locales in the “full” coverage tier. */
89+
full?: string[];
90+
/** List of default content locales, from defaultContent.json. */
91+
defaultContent?: string[];
7892
}
7993

8094
/** Main routine */
@@ -91,6 +105,9 @@ async function buildCldrLocales(): Promise<void> {
91105
missingCountsTsvPromise,
92106
]);
93107

108+
// Cast to our interface. Some keys (e.g. core) may be missing from
109+
// availableLocales.json depending on the CLDR release, so we treat
110+
// absent properties as empty arrays below.
94111
const available: AvailableLocales = availableLocales as AvailableLocales;
95112
const coverageRows = parseTsv(coverageTsv) as unknown as RawCoverageRow[];
96113
const missingRows = parseTsv(missingTsv) as unknown as RawMissingCountsRow[];
@@ -106,7 +123,12 @@ async function buildCldrLocales(): Promise<void> {
106123
if (id && id.includes('_')) missingMap[id] = row;
107124
}
108125

109-
const localeList = new Set<string>([...available.core, ...available.modern, ...available.full]);
126+
// Normalise the lists. If a tier list is undefined in this release we
127+
// substitute an empty array so that spread operations don’t blow up.
128+
const coreList: string[] = Array.isArray(available.core) ? available.core : [];
129+
const modernList: string[] = Array.isArray(available.modern) ? available.modern : [];
130+
const fullList: string[] = Array.isArray(available.full) ? available.full : [];
131+
const localeList = new Set<string>([...coreList, ...modernList, ...fullList]);
110132
const output: any[] = [];
111133

112134
const pct = (value: string): number | undefined => {
@@ -116,27 +138,68 @@ async function buildCldrLocales(): Promise<void> {
116138
};
117139

118140
for (const loc of localeList) {
119-
const tier = available.core.includes(loc)
120-
? 'core'
121-
: available.modern.includes(loc)
122-
? 'modern'
123-
: 'full';
124-
const isDefault = available.defaultContent?.includes(loc) ?? false;
141+
// Lookup coverage and missing-count rows using the raw locale ID
142+
// (which uses hyphens).
125143
const coverage = coverageMap[loc];
126144
const missing = missingMap[loc];
127145

128-
// Queue XML existence check; we’ll resolve after building objects
146+
// Determine tier based on the target coverage level reported in the
147+
// locale-coverage.tsv file. Coverage levels use the same naming as
148+
// our tier type (core, basic, moderate, modern). Some rows prefix
149+
// the level with an asterisk to indicate a computed value; strip any
150+
// leading non‑letters before comparison. If no coverage information
151+
// exists for this locale, fall back to the modern tier. We do not
152+
// expose a separate “full” tier; locales in the full list will be
153+
// classified according to their coverage level.
154+
let tier: 'core' | 'basic' | 'moderate' | 'modern' = 'modern';
155+
if (coverage && coverage['Target Level']) {
156+
const rawLevel = coverage['Target Level'].replace(/^[^A-Za-z]*/, '').toLowerCase();
157+
if (rawLevel === 'core' || rawLevel === 'basic' || rawLevel === 'moderate' || rawLevel === 'modern') {
158+
tier = rawLevel as typeof tier;
159+
}
160+
}
161+
162+
// Flag default-content locales. If the defaultContent list is absent,
163+
// treat all locales as non-default.
164+
const isDefault = Array.isArray(available.defaultContent)
165+
? available.defaultContent.includes(loc)
166+
: false;
167+
168+
// Queue XML existence check; resolve after building objects
129169
const xmlPromise = xmlExists(loc);
130170

171+
// Prepare a normalised version of the locale for downstream lookup. The
172+
// UI expects underscores as separators.
173+
const subtags = loc.split('-');
174+
const normalizedLocale = subtags.join('_');
175+
const language = subtags[0];
176+
let script: string | undefined;
177+
let region: string | undefined;
178+
if (subtags.length === 2) {
179+
const second = subtags[1];
180+
if (/^[A-Z][a-z]{3}$/.test(second)) {
181+
script = second;
182+
} else {
183+
region = second;
184+
}
185+
} else if (subtags.length >= 3) {
186+
const second = subtags[1];
187+
const third = subtags[2];
188+
if (/^[A-Z][a-z]{3}$/.test(second)) {
189+
script = second;
190+
region = third;
191+
} else {
192+
region = second;
193+
}
194+
}
195+
131196
output.push({
132-
locale: loc,
133-
language: loc.split(/[_-]/)[0],
134-
region: loc.split(/[_-]/)[1] ?? undefined,
135-
script: loc.split(/[_-]/)[2] ?? undefined,
197+
locale: normalizedLocale,
198+
language,
199+
region,
200+
script,
136201
tier,
137-
// use new property name expected by UI
138202
localeIsDefaultForLanguage: isDefault,
139-
// Coverage fields
140203
targetLevel: coverage?.['Target Level'] || undefined,
141204
computedLevel: coverage?.['Computed Level'] || undefined,
142205
confirmedPct: pct(coverage?.['%'] || ''),
@@ -147,7 +210,9 @@ async function buildCldrLocales(): Promise<void> {
147210
icuIncluded: coverage?.ICU?.toLowerCase().includes('icu') ?? false,
148211
defaultRegion: coverage?.['Default Region'] || undefined,
149212
notes:
150-
coverage && coverage['Missing Features'] ? coverage['Missing Features'].split(/,\s*/) : [],
213+
coverage && coverage['Missing Features']
214+
? coverage['Missing Features'].split(/,\s*/)
215+
: [],
151216
missingCounts: missing
152217
? {
153218
found: Number.parseInt(missing.Found || '0', 10),

src/entities/types/CLDRLocaleTypes.tsx

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,15 @@ export interface CLDRLocaleSupport {
1313
/** Script code, e.g. "Latn" (optional) */
1414
script?: string;
1515

16-
/** Tier of locale support: core, basic, moderate or modern */
16+
/**
17+
* Tier of locale support. The Unicode CLDR defines four coverage levels
18+
* for locales: core, basic, moderate and modern. These indicate the
19+
* amount of locale data available and correspond to increasing levels of
20+
* completeness in CLDR. We intentionally do not expose the “full” list
21+
* from availableLocales.json as an explicit tier; instead, locales in
22+
* that list are classified using their target coverage level from the
23+
* CLDR charts (or default to modern if no chart data is present).
24+
*/
1725
tier: 'core' | 'basic' | 'moderate' | 'modern';
1826

1927
/**

0 commit comments

Comments
 (0)