1818
1919import fs from 'node:fs/promises' ;
2020import path from 'node:path' ;
21- import territoryInfo from 'cldr-core/supplemental/territoryInfo.json' ;
21+
22+ const CLDR_RELEASE = '43.0.0' ;
23+ const CLDR_CORE_BASE = `https://cdn.jsdelivr.net/npm/cldr-core@${ CLDR_RELEASE } ` ;
24+ const CHARTS_TSV_BASE = `https://raw.githubusercontent.com/unicode-org/cldr-staging/main/docs/charts/${ CLDR_RELEASE . replace (
25+ / \. 0 \. 0 $ / ,
26+ '' ,
27+ ) } /tsv`;
28+ const CLDR_REPO_RAW_BASE = 'https://raw.githubusercontent.com/unicode-org/cldr/main/common/main' ;
2229
2330const OUTPUT_FILE = path . join ( process . cwd ( ) , 'public/data/unicode/cldrLocales.json' ) ;
2431
@@ -71,10 +78,17 @@ interface RawMissingCountsRow {
7178}
7279
7380interface AvailableLocales {
74- core : string [ ] ;
75- modern : string [ ] ;
76- full : string [ ] ;
77- defaultContent : string [ ] ;
81+ /**
82+ * Locales in the “core” coverage tier. Not all releases publish a core
83+ * list, so this property is optional.
84+ */
85+ core ?: string [ ] ;
86+ /** Locales in the “modern” coverage tier. */
87+ modern ?: string [ ] ;
88+ /** Locales in the “full” coverage tier. */
89+ full ?: string [ ] ;
90+ /** List of default content locales, from defaultContent.json. */
91+ defaultContent ?: string [ ] ;
7892}
7993
8094/** Main routine */
@@ -91,6 +105,9 @@ async function buildCldrLocales(): Promise<void> {
91105 missingCountsTsvPromise ,
92106 ] ) ;
93107
108+ // Cast to our interface. Some keys (e.g. core) may be missing from
109+ // availableLocales.json depending on the CLDR release, so we treat
110+ // absent properties as empty arrays below.
94111 const available : AvailableLocales = availableLocales as AvailableLocales ;
95112 const coverageRows = parseTsv ( coverageTsv ) as unknown as RawCoverageRow [ ] ;
96113 const missingRows = parseTsv ( missingTsv ) as unknown as RawMissingCountsRow [ ] ;
@@ -106,7 +123,12 @@ async function buildCldrLocales(): Promise<void> {
106123 if ( id && id . includes ( '_' ) ) missingMap [ id ] = row ;
107124 }
108125
109- const localeList = new Set < string > ( [ ...available . core , ...available . modern , ...available . full ] ) ;
126+ // Normalise the lists. If a tier list is undefined in this release we
127+ // substitute an empty array so that spread operations don’t blow up.
128+ const coreList : string [ ] = Array . isArray ( available . core ) ? available . core : [ ] ;
129+ const modernList : string [ ] = Array . isArray ( available . modern ) ? available . modern : [ ] ;
130+ const fullList : string [ ] = Array . isArray ( available . full ) ? available . full : [ ] ;
131+ const localeList = new Set < string > ( [ ...coreList , ...modernList , ...fullList ] ) ;
110132 const output : any [ ] = [ ] ;
111133
112134 const pct = ( value : string ) : number | undefined => {
@@ -116,27 +138,68 @@ async function buildCldrLocales(): Promise<void> {
116138 } ;
117139
118140 for ( const loc of localeList ) {
119- const tier = available . core . includes ( loc )
120- ? 'core'
121- : available . modern . includes ( loc )
122- ? 'modern'
123- : 'full' ;
124- const isDefault = available . defaultContent ?. includes ( loc ) ?? false ;
141+ // Lookup coverage and missing-count rows using the raw locale ID
142+ // (which uses hyphens).
125143 const coverage = coverageMap [ loc ] ;
126144 const missing = missingMap [ loc ] ;
127145
128- // Queue XML existence check; we’ll resolve after building objects
146+ // Determine tier based on the target coverage level reported in the
147+ // locale-coverage.tsv file. Coverage levels use the same naming as
148+ // our tier type (core, basic, moderate, modern). Some rows prefix
149+ // the level with an asterisk to indicate a computed value; strip any
150+ // leading non‑letters before comparison. If no coverage information
151+ // exists for this locale, fall back to the modern tier. We do not
152+ // expose a separate “full” tier; locales in the full list will be
153+ // classified according to their coverage level.
154+ let tier : 'core' | 'basic' | 'moderate' | 'modern' = 'modern' ;
155+ if ( coverage && coverage [ 'Target Level' ] ) {
156+ const rawLevel = coverage [ 'Target Level' ] . replace ( / ^ [ ^ A - Z a - z ] * / , '' ) . toLowerCase ( ) ;
157+ if ( rawLevel === 'core' || rawLevel === 'basic' || rawLevel === 'moderate' || rawLevel === 'modern' ) {
158+ tier = rawLevel as typeof tier ;
159+ }
160+ }
161+
162+ // Flag default-content locales. If the defaultContent list is absent,
163+ // treat all locales as non-default.
164+ const isDefault = Array . isArray ( available . defaultContent )
165+ ? available . defaultContent . includes ( loc )
166+ : false ;
167+
168+ // Queue XML existence check; resolve after building objects
129169 const xmlPromise = xmlExists ( loc ) ;
130170
171+ // Prepare a normalised version of the locale for downstream lookup. The
172+ // UI expects underscores as separators.
173+ const subtags = loc . split ( '-' ) ;
174+ const normalizedLocale = subtags . join ( '_' ) ;
175+ const language = subtags [ 0 ] ;
176+ let script : string | undefined ;
177+ let region : string | undefined ;
178+ if ( subtags . length === 2 ) {
179+ const second = subtags [ 1 ] ;
180+ if ( / ^ [ A - Z ] [ a - z ] { 3 } $ / . test ( second ) ) {
181+ script = second ;
182+ } else {
183+ region = second ;
184+ }
185+ } else if ( subtags . length >= 3 ) {
186+ const second = subtags [ 1 ] ;
187+ const third = subtags [ 2 ] ;
188+ if ( / ^ [ A - Z ] [ a - z ] { 3 } $ / . test ( second ) ) {
189+ script = second ;
190+ region = third ;
191+ } else {
192+ region = second ;
193+ }
194+ }
195+
131196 output . push ( {
132- locale : loc ,
133- language : loc . split ( / [ _ - ] / ) [ 0 ] ,
134- region : loc . split ( / [ _ - ] / ) [ 1 ] ?? undefined ,
135- script : loc . split ( / [ _ - ] / ) [ 2 ] ?? undefined ,
197+ locale : normalizedLocale ,
198+ language,
199+ region,
200+ script,
136201 tier,
137- // use new property name expected by UI
138202 localeIsDefaultForLanguage : isDefault ,
139- // Coverage fields
140203 targetLevel : coverage ?. [ 'Target Level' ] || undefined ,
141204 computedLevel : coverage ?. [ 'Computed Level' ] || undefined ,
142205 confirmedPct : pct ( coverage ?. [ '%' ] || '' ) ,
@@ -147,7 +210,9 @@ async function buildCldrLocales(): Promise<void> {
147210 icuIncluded : coverage ?. ICU ?. toLowerCase ( ) . includes ( 'icu' ) ?? false ,
148211 defaultRegion : coverage ?. [ 'Default Region' ] || undefined ,
149212 notes :
150- coverage && coverage [ 'Missing Features' ] ? coverage [ 'Missing Features' ] . split ( / , \s * / ) : [ ] ,
213+ coverage && coverage [ 'Missing Features' ]
214+ ? coverage [ 'Missing Features' ] . split ( / , \s * / )
215+ : [ ] ,
151216 missingCounts : missing
152217 ? {
153218 found : Number . parseInt ( missing . Found || '0' , 10 ) ,
0 commit comments