WeblateOrg · nijel · Apr 1, 2025 · Apr 1, 2025
diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
@@ -38,5 +38,5 @@ jobs:
     - name: Run mypy
       run: |
         echo "::add-matcher::.github/matchers/mypy.json"
-        mypy --show-column-numbers weblate_language_data
+        mypy --show-column-numbers weblate_language_data scripts/*.py
         echo "::remove-matcher owner=mypy::"
diff --git a/scripts/add-iso.py b/scripts/add-iso.py
@@ -39,6 +39,8 @@ def convert_name(name: str) -> str:
                     item.get("inverted_name", item["name"])
                 )
 
+lines: list[list[str]]
+
 with open("languages.csv", newline="") as handle:
     reader = csv.reader(handle, delimiter=",")
     header = next(reader)
@@ -70,12 +72,17 @@ def convert_name(name: str) -> str:
         [
             code,
             names[code].split(";")[-1].strip(),
-            plurals,
+            str(plurals),
             formula,
         ],
     )
 
 with open("languages.csv", "w", newline="") as handle:
     writer = csv.writer(handle, delimiter=",", lineterminator="\n")
     writer.writerow(header)
-    writer.writerows(sorted(lines, key=itemgetter(0)))
+    writer.writerows(
+        sorted(
+            lines,
+            key=itemgetter(0),  # type: ignore[arg-type]
+        )
+    )
diff --git a/scripts/export-cldr-case.py b/scripts/export-cldr-case.py
@@ -4,15 +4,16 @@
 #
 # SPDX-License-Identifier: MIT
 
+from __future__ import annotations
+
 import csv
 import json
 
 # Read languages
 with open("languages.csv") as csvfile:
     reader = csv.reader(csvfile, delimiter=",")
     next(reader)
-    LANGUAGES = list(reader)
-    LANGUAGE_CODES = {lang[0] for lang in LANGUAGES}
+    LANGUAGE_CODES = {lang[0] for lang in reader}
 
 # Read
 with open("case-insensitive.csv") as csvfile:
@@ -27,8 +28,12 @@
 with open(
     "modules/cldr-json/cldr-json/cldr-core/supplemental/languageData.json",
 ) as handle:
-    LANGUAGES = json.load(handle)["supplemental"]["languageData"]
+    LANGUAGES: dict[str, dict[str, str]] = json.load(handle)["supplemental"][
+        "languageData"
+    ]
 
+base: str
+script: str | None
 for code in LANGUAGE_CODES:
     if "_" in code:
         base, script = code.split("_", 1)

diff --git a/scripts/export-cldr-population.py b/scripts/export-cldr-population.py
@@ -14,7 +14,7 @@
 with open(
     "modules/cldr-json/cldr-json/cldr-core/supplemental/territoryInfo.json",
 ) as handle:
-    languages = defaultdict(int)
+    languages: dict[str, float] = defaultdict(float)
     for code, territory in json.load(handle)["supplemental"]["territoryInfo"].items():
         population = int(territory["_population"])
         if "languagePopulation" not in territory:

diff --git a/scripts/export-cldr.py b/scripts/export-cldr.py
@@ -8,6 +8,7 @@
 
 import json
 import re
+from typing import Literal, TypedDict
 
 MAPPINGS = {
     "ar_001": "ar",
@@ -112,7 +113,7 @@ def convert_atom(atom: str) -> str | bool:
     )
 
 
-def convert_formula(cldr_formula_and_examples: str) -> str:
+def convert_formula(cldr_formula_and_examples: str) -> str | bool:
     # Skip formulas which do not trigger integer
     if "@integer" not in cldr_formula_and_examples:
         return False
@@ -169,7 +170,9 @@ def convert_formula(cldr_formula_and_examples: str) -> str:
     return " || ".join(chunks)
 
 
-def reverse_formula(formula: str) -> str:
+def reverse_formula(formula: str | bool) -> str:
+    if isinstance(formula, bool):
+        raise TypeError(f"Unable to reverse the formula {formula!r}")
     if re.match(r"^n( % \d+)? == \d+(\.\.\d+|,\d+)*?$", formula):
         return formula.replace(" == ", " != ")
     if re.match(r"^n( % \d+)? != \d+(\.\.\d+|,\d+)*?$", formula):
@@ -193,14 +196,16 @@ def reverse_formula(formula: str) -> str:
     if formula == "(n == 0 || n == 1) || n >= 11 && n <= 99":
         return "n >= 2 && (n < 11 || n > 99)"
 
-    raise ValueError(f"Unable to reverse the formula '{formula}'")
+    raise ValueError(f"Unable to reverse the formula {formula!r}")
 
 
-def merge_formulas(formulas: list[str]) -> str:
+def merge_formulas(formulas: list[str | Literal[True]]) -> str:
     max_n = len(formulas) - 1
     formula = f"{max_n}"
     for n in range(max_n - 1, -1, -1):
         part = formulas[n]
+        if isinstance(part, bool):
+            raise TypeError(f"Not supported part {part!r}")
 
         if not re.match(r"^\([^()]+\)$", part):
             part = f"({part})"
@@ -211,6 +216,14 @@ def merge_formulas(formulas: list[str]) -> str:
     return formula
 
 
+class LanguageDict(TypedDict, total=False):
+    code: str
+    name: str
+    plurals: int
+    formula: str
+
+
+LANGUAGES: dict[str, LanguageDict]
 # Load language names
 with open(
     "modules/cldr-json/cldr-json/cldr-localenames-full/main/en/languages.json",

diff --git a/scripts/export-plural-tags.py b/scripts/export-plural-tags.py
@@ -9,8 +9,8 @@
 import pprint
 import subprocess
 
-BASE = "modules/cldr-json/cldr-json/cldr-core/supplemental/plurals.json"
-ALIASES = "modules/cldr-json/cldr-json/cldr-core/supplemental/aliases.json"
+BASE_FILE = "modules/cldr-json/cldr-json/cldr-core/supplemental/plurals.json"
+ALIASES_FILE = "modules/cldr-json/cldr-json/cldr-core/supplemental/aliases.json"
 
 HEADER = '''# Copyright © Michal Čihař <michal@weblate.org>
 #
@@ -93,7 +93,7 @@
 result = {}
 decimals = {}
 
-with open(BASE) as handle:
+with open(BASE_FILE) as handle:
     data = json.load(handle)
 
 for locale, rules in data["supplemental"]["plurals-type-cardinal"].items():
@@ -105,7 +105,7 @@
     decimals[locale] = [name.replace("pluralRule-count-", "") for name in rules]
 
 # Process CLDR
-with open(ALIASES) as handle:
+with open(ALIASES_FILE) as handle:
     aliases = json.load(handle)
 
 for code, alias in aliases["supplemental"]["metadata"]["alias"][

diff --git a/scripts/generate-language-data.py b/scripts/generate-language-data.py
@@ -135,7 +135,7 @@ def get_population(code):
         except KeyError:
             continue
         if existing != number:
-            CLDRPLURALS.append((code, LANGUAGE_NAMES[code], number, equation))
+            CLDRPLURALS.append([code, LANGUAGE_NAMES[code], number, equation])
 
 # Read default countries
 with open("default_countries.csv") as csvfile: