diff --git a/package.json b/package.json index 474c8c02..ed2f1a63 100644 --- a/package.json +++ b/package.json @@ -1,4 +1,4 @@ -{ + { "name": "lang-nav", "private": true, "version": "0.0.0", @@ -10,6 +10,7 @@ "preview": "vite preview", "predeploy": "npm run build", "deploy": "gh-pages -d dist", + "build:cldr-locales": "ts-node scripts/ingest/build-cldr-locales.ts" "test": "vitest", "test:watch": "vitest --watch", "test:ui": "vitest --ui", diff --git a/public/data/unicode/cldrLocales.json b/public/data/unicode/cldrLocales.json new file mode 100644 index 00000000..73c39a41 --- /dev/null +++ b/public/data/unicode/cldrLocales.json @@ -0,0 +1,5 @@ +{ + "release": "43.0.0", + "generatedAt": "", + "locales": [] +} diff --git a/scripts/ingest/build-cldr-locales.ts b/scripts/ingest/build-cldr-locales.ts new file mode 100644 index 00000000..8c55c20e --- /dev/null +++ b/scripts/ingest/build-cldr-locales.ts @@ -0,0 +1,250 @@ +/** + * Build script to generate a list of CLDR locale support objects. + * + * This script fetches data from the Unicode CLDR project and combines + * multiple data sources to produce a JSON file consumed by the UI. The + * output file is stored in `public/data/unicode/cldrLocales.json` and + * includes one entry per locale with a variety of support metrics. These + * metrics include whether the locale has an XML file in the CLDR + * repository, the tier of support (core, modern, full), coverage levels + * and percentages, ICU inclusion, default-content flags and more. + * + * This script is not executed at runtime by the browser. Instead it is + * intended to be run manually or as part of a build pipeline. Because + * network access may not be available when run in some environments, the + * script is defensive: network calls are isolated and the data sources + * are configurable via constants at the top of the file. + */ + +import fs from 'node:fs/promises'; +import path from 'node:path'; + +const CLDR_RELEASE = '43.0.0'; +const CLDR_CORE_BASE = `https://cdn.jsdelivr.net/npm/cldr-core@${CLDR_RELEASE}`; +const CHARTS_TSV_BASE = `https://raw.githubusercontent.com/unicode-org/cldr-staging/main/docs/charts/${CLDR_RELEASE.replace( + /\.0\.0$/, + '', +)}/tsv`; +const CLDR_REPO_RAW_BASE = 'https://raw.githubusercontent.com/unicode-org/cldr/main/common/main'; + +const OUTPUT_FILE = path.join(process.cwd(), 'public/data/unicode/cldrLocales.json'); + +/** Simple TSV parser (no quoted fields needed for CLDR TSVs) */ +function parseTsv(tsv: string): Record[] { + const lines = tsv.trim().split(/\r?\n/); + if (lines.length === 0) return []; + const header = lines[0].split('\t'); + return lines.slice(1).map((line) => { + const values = line.split('\t'); + const obj: Record = {}; + header.forEach((key, idx) => { + obj[key] = values[idx] ?? ''; + }); + return obj; + }); +} + +/** Check if `common/main/.xml` exists in the CLDR repo */ +async function xmlExists(locale: string): Promise { + const url = `${CLDR_REPO_RAW_BASE}/${locale}.xml`; + try { + const response = await fetch(url, { method: 'HEAD' }); + return response.ok; + } catch { + // Treat network failures as "unknown/false" + return false; + } +} + +interface RawCoverageRow { + 'Language/Locale': string; + 'Target Level': string; + '≟': string; + 'Computed Level': string; + ICU: string; + '%': string; + 'ⓜ%': string; + 'ⓑ%': string; + 'ⓒ%': string; + 'Missing Features': string; + 'Default Region': string; +} + +interface RawMissingCountsRow { + 'Language/Locale': string; + Found: string; + Unconfirmed: string; + Missing: string; +} + +interface AvailableLocales { + /** + * Locales in the “core” coverage tier. Not all releases publish a core + * list, so this property is optional. + */ + core?: string[]; + /** Locales in the “modern” coverage tier. */ + modern?: string[]; + /** Locales in the “full” coverage tier. */ + full?: string[]; + /** List of default content locales, from defaultContent.json. */ + defaultContent?: string[]; +} + +/** Main routine */ +async function buildCldrLocales(): Promise { + const availablePromise = fetch(`${CLDR_CORE_BASE}/availableLocales.json`).then((r) => r.json()); + const coverageTsvPromise = fetch(`${CHARTS_TSV_BASE}/locale-coverage.tsv`).then((r) => r.text()); + const missingCountsTsvPromise = fetch(`${CHARTS_TSV_BASE}/locale-missing-counts.tsv`).then((r) => + r.text(), + ); + + const [availableLocales, coverageTsv, missingTsv] = await Promise.all([ + availablePromise, + coverageTsvPromise, + missingCountsTsvPromise, + ]); + + // Cast to our interface. Some keys (e.g. core) may be missing from + // availableLocales.json depending on the CLDR release, so we treat + // absent properties as empty arrays below. + const available: AvailableLocales = availableLocales as AvailableLocales; + const coverageRows = parseTsv(coverageTsv) as unknown as RawCoverageRow[]; + const missingRows = parseTsv(missingTsv) as unknown as RawMissingCountsRow[]; + + const coverageMap: Record = {}; + for (const row of coverageRows) { + const id = row['Language/Locale']; + if (id && id.includes('_')) coverageMap[id] = row; + } + const missingMap: Record = {}; + for (const row of missingRows) { + const id = row['Language/Locale']; + if (id && id.includes('_')) missingMap[id] = row; + } + + // Normalise the lists. If a tier list is undefined in this release we + // substitute an empty array so that spread operations don’t blow up. + const coreList: string[] = Array.isArray(available.core) ? available.core : []; + const modernList: string[] = Array.isArray(available.modern) ? available.modern : []; + const fullList: string[] = Array.isArray(available.full) ? available.full : []; + const localeList = new Set([...coreList, ...modernList, ...fullList]); + const output: any[] = []; + + const pct = (value: string): number | undefined => { + if (!value || value.trim() === '' || value.trim() === '—') return undefined; + const num = parseFloat(value); + return Number.isFinite(num) ? num : undefined; + }; + + for (const loc of localeList) { + // Lookup coverage and missing-count rows using the raw locale ID + // (which uses hyphens). + const coverage = coverageMap[loc]; + const missing = missingMap[loc]; + + // Determine tier based on the target coverage level reported in the + // locale-coverage.tsv file. Coverage levels use the same naming as + // our tier type (core, basic, moderate, modern). Some rows prefix + // the level with an asterisk to indicate a computed value; strip any + // leading non‑letters before comparison. If no coverage information + // exists for this locale, fall back to the modern tier. We do not + // expose a separate “full” tier; locales in the full list will be + // classified according to their coverage level. + let tier: 'core' | 'basic' | 'moderate' | 'modern' = 'modern'; + if (coverage && coverage['Target Level']) { + const rawLevel = coverage['Target Level'].replace(/^[^A-Za-z]*/, '').toLowerCase(); + if (rawLevel === 'core' || rawLevel === 'basic' || rawLevel === 'moderate' || rawLevel === 'modern') { + tier = rawLevel as typeof tier; + } + } + + // Flag default-content locales. If the defaultContent list is absent, + // treat all locales as non-default. + const isDefault = Array.isArray(available.defaultContent) + ? available.defaultContent.includes(loc) + : false; + + // Queue XML existence check; resolve after building objects + const xmlPromise = xmlExists(loc); + + // Prepare a normalised version of the locale for downstream lookup. The + // UI expects underscores as separators. + const subtags = loc.split('-'); + const normalizedLocale = subtags.join('_'); + const language = subtags[0]; + let script: string | undefined; + let region: string | undefined; + if (subtags.length === 2) { + const second = subtags[1]; + if (/^[A-Z][a-z]{3}$/.test(second)) { + script = second; + } else { + region = second; + } + } else if (subtags.length >= 3) { + const second = subtags[1]; + const third = subtags[2]; + if (/^[A-Z][a-z]{3}$/.test(second)) { + script = second; + region = third; + } else { + region = second; + } + } + + output.push({ + locale: normalizedLocale, + language, + region, + script, + tier, + localeIsDefaultForLanguage: isDefault, + targetLevel: coverage?.['Target Level'] || undefined, + computedLevel: coverage?.['Computed Level'] || undefined, + confirmedPct: pct(coverage?.['%'] || ''), + pctModern: pct(coverage?.['%'] || ''), + pctModerate: pct(coverage?.['ⓜ%'] || ''), + pctBasic: pct(coverage?.['ⓑ%'] || ''), + pctCore: pct(coverage?.['ⓒ%'] || ''), + icuIncluded: coverage?.ICU?.toLowerCase().includes('icu') ?? false, + defaultRegion: coverage?.['Default Region'] || undefined, + notes: + coverage && coverage['Missing Features'] + ? coverage['Missing Features'].split(/,\s*/) + : [], + missingCounts: missing + ? { + found: Number.parseInt(missing.Found || '0', 10), + unconfirmed: Number.parseInt(missing.Unconfirmed || '0', 10), + missing: Number.parseInt(missing.Missing || '0', 10), + } + : undefined, + __xmlPromise: xmlPromise, + }); + } + + // Resolve XML presence flags + await Promise.all( + output.map(async (entry) => { + const present = await entry.__xmlPromise; + entry.presentInCLDRDatabase = present; + delete entry.__xmlPromise; + }), + ); + + const finalOutput = { + release: CLDR_RELEASE, + generatedAt: new Date().toISOString(), + locales: output, + }; + + await fs.mkdir(path.dirname(OUTPUT_FILE), { recursive: true }); + await fs.writeFile(OUTPUT_FILE, JSON.stringify(finalOutput, null, 2)); + console.log(`Wrote ${output.length} locale records to ${OUTPUT_FILE}`); +} + +buildCldrLocales().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/src/entities/types/CLDRLocaleTypes.tsx b/src/entities/types/CLDRLocaleTypes.tsx new file mode 100644 index 00000000..fdd49171 --- /dev/null +++ b/src/entities/types/CLDRLocaleTypes.tsx @@ -0,0 +1,71 @@ +/** + * Types for CLDR locale support. These types mirror the structure of the + * objects generated by the build script in scripts/ingest/build-cldr-locales.ts. + */ + +export interface CLDRLocaleSupport { + /** Full BCP-47 locale, e.g. "en_US" */ + locale: string; + /** Language code, e.g. "en" */ + language: string; + /** Region code, e.g. "US" (optional) */ + region?: string; + /** Script code, e.g. "Latn" (optional) */ + script?: string; + + /** + * Tier of locale support. The Unicode CLDR defines four coverage levels + * for locales: core, basic, moderate and modern. These indicate the + * amount of locale data available and correspond to increasing levels of + * completeness in CLDR. We intentionally do not expose the “full” list + * from availableLocales.json as an explicit tier; instead, locales in + * that list are classified using their target coverage level from the + * CLDR charts (or default to modern if no chart data is present). + */ + tier: 'core' | 'basic' | 'moderate' | 'modern'; + + /** + * True if this locale is listed in defaultContent.json. Indicates that the + * locale is the default content locale for its language. + */ + localeIsDefaultForLanguage?: boolean; + + /** Whether a corresponding XML exists in CLDR repo */ + presentInCLDRDatabase?: boolean; + + /** Coverage target (modern/moderate/basic/core) */ + targetLevel?: string; + /** Coverage computed level */ + computedLevel?: string; + + /** Aggregate % confirmed */ + confirmedPct?: number; + + /** Breakdown percentages (optional) */ + pctModern?: number; + pctModerate?: number; + pctBasic?: number; + pctCore?: number; + + /** True if the locale is included in ICU datasets (per charts) */ + icuIncluded?: boolean; + + /** Charts’ default region column (if present) */ + defaultRegion?: string; + + /** Missing feature tokens from charts */ + notes?: string[]; + + /** Counts at target level */ + missingCounts?: { + found: number; + unconfirmed: number; + missing: number; + }; +} + +export interface CLDRLocaleIndex { + release: string; + generatedAt: string; + locales: CLDRLocaleSupport[]; +} diff --git a/src/features/data-loading/cldrLocales.ts b/src/features/data-loading/cldrLocales.ts new file mode 100644 index 00000000..ee6d6019 --- /dev/null +++ b/src/features/data-loading/cldrLocales.ts @@ -0,0 +1,18 @@ +import cldrLocalesData from '../../public/data/unicode/cldrLocales.json'; +import type { CLDRLocaleIndex, CLDRLocaleSupport } from '../types/CLDRLocaleTypes'; + +// Import the generated JSON instead of requiring it. + +// Cast to the correct interface +const rawData: CLDRLocaleIndex = cldrLocalesData as unknown as CLDRLocaleIndex; + +/** Return all CLDR locale support entries. */ +export function getAllCldrLocales(): CLDRLocaleSupport[] { + return rawData.locales; +} + +/** Retrieve the CLDR support entry for a given locale code. */ +export function getCldrLocale(localeId: string): CLDRLocaleSupport | undefined { + const idLower = localeId.toLowerCase(); + return rawData.locales.find((entry) => entry.locale.toLowerCase() === idLower); +} diff --git a/src/widgets/details/LocaleDetails.tsx b/src/widgets/details/LocaleDetails.tsx index 985b237f..7d29e523 100644 --- a/src/widgets/details/LocaleDetails.tsx +++ b/src/widgets/details/LocaleDetails.tsx @@ -1,7 +1,20 @@ import React from 'react'; +<<<<<<< HEAD:src/views/locale/LocaleDetails.tsx +import { getCldrLocale } from '../../data/cldrLocales'; +import CommaSeparated from '../../generic/CommaSeparated'; +import Deemphasized from '../../generic/Deemphasized'; +import { numberToFixedUnlessSmall, numberToSigFigs } from '../../generic/numberUtils'; +import { PercentageDifference } from '../../generic/PercentageDifference'; +import { LocaleData, LocaleSource } from '../../types/DataTypes'; +import DetailsField from '../common/details/DetailsField'; +import DetailsSection from '../common/details/DetailsSection'; +import HoverableObjectName from '../common/HoverableObjectName'; +import ObjectWikipediaInfo from '../common/ObjectWikipediaInfo'; +======= import Hoverable from '@features/hovercard/Hoverable'; import HoverableObjectName from '@features/hovercard/HoverableObjectName'; +>>>>>>> origin/master:src/widgets/details/LocaleDetails.tsx import LocaleCensusCitation from '@entities/locale/LocaleCensusCitation'; import { LocalePopulationAdjusted } from '@entities/locale/LocalePopulationAdjusted'; @@ -17,9 +30,7 @@ import Deemphasized from '@shared/ui/Deemphasized'; import { PercentageDifference } from '@shared/ui/PercentageDifference'; import Pill from '@shared/ui/Pill'; -type Props = { - locale: LocaleData; -}; +type Props = { locale: LocaleData }; const LocaleDetails: React.FC = ({ locale }) => { return ( @@ -213,6 +224,43 @@ const LocalePopulationSection: React.FC<{ locale: LocaleData }> = ({ locale }) = ); }; +/** CLDR Support section */ +const LocaleCLDRSupportSection: React.FC<{ locale: LocaleData }> = ({ locale }) => { + const cldr = getCldrLocale(locale.ID); + if (!cldr) { + return ( + + Not supported by CLDR. + + ); + } + return ( + + {cldr.tier} + + {cldr.presentInCLDRDatabase ? 'Yes' : 'No'} + + + {cldr.localeIsDefaultForLanguage ? 'Yes' : 'No'} + + + {cldr.targetLevel ?? '—'} / {cldr.computedLevel ?? '—'} + + {cldr.confirmedPct != null && ( + {cldr.confirmedPct.toFixed(1)}% + )} + {cldr.icuIncluded != null && ( + {cldr.icuIncluded ? 'Yes' : 'No'} + )} + {cldr.missingCounts && ( + + {cldr.missingCounts.found} found / {cldr.missingCounts.unconfirmed} unconfirmed /{' '} + {cldr.missingCounts.missing} missing + + )} + {cldr.notes && cldr.notes.length > 0 && ( + {cldr.notes.join(', ')} + )} const LocaleOtherSection: React.FC<{ locale: LocaleData }> = ({ locale }) => { const { officialStatus, wikipedia, localeSource, containedLocales } = locale; return ( diff --git a/tsconfig.app.json b/tsconfig.app.json index 281459cf..d5356011 100644 --- a/tsconfig.app.json +++ b/tsconfig.app.json @@ -6,6 +6,8 @@ "lib": ["ES2020", "DOM", "DOM.Iterable"], "module": "ESNext", "skipLibCheck": true, + "resolveJsonModule": true, + "types": ["node"], /* Bundler mode */ "moduleResolution": "bundler",