From ad7684f2ed91adbd711db5cdbe1a3edb46abe1db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com>
Date: Tue, 1 Apr 2025 10:06:20 +0200
Subject: [PATCH] feat(ci): run mypy over scripts

---
 .github/workflows/mypy.yml        |  2 +-
 scripts/add-iso.py                | 11 +++++++++--
 scripts/export-cldr-case.py       | 11 ++++++++---
 scripts/export-cldr-population.py |  2 +-
 scripts/export-cldr.py            | 21 +++++++++++++++++----
 scripts/export-plural-tags.py     |  8 ++++----
 scripts/generate-language-data.py |  2 +-
 7 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
index def091f98..35597117b 100644
--- a/.github/workflows/mypy.yml
+++ b/.github/workflows/mypy.yml
@@ -38,5 +38,5 @@ jobs:
     - name: Run mypy
       run: |
         echo "::add-matcher::.github/matchers/mypy.json"
-        mypy --show-column-numbers weblate_language_data
+        mypy --show-column-numbers weblate_language_data scripts/*.py
         echo "::remove-matcher owner=mypy::"
diff --git a/scripts/add-iso.py b/scripts/add-iso.py
index 700114278..186795a2b 100755
--- a/scripts/add-iso.py
+++ b/scripts/add-iso.py
@@ -39,6 +39,8 @@ def convert_name(name: str) -> str:
                     item.get("inverted_name", item["name"])
                 )
 
+lines: list[list[str]]
+
 with open("languages.csv", newline="") as handle:
     reader = csv.reader(handle, delimiter=",")
     header = next(reader)
@@ -70,7 +72,7 @@ def convert_name(name: str) -> str:
         [
             code,
             names[code].split(";")[-1].strip(),
-            plurals,
+            str(plurals),
             formula,
         ],
     )
@@ -78,4 +80,9 @@ def convert_name(name: str) -> str:
 with open("languages.csv", "w", newline="") as handle:
     writer = csv.writer(handle, delimiter=",", lineterminator="\n")
     writer.writerow(header)
-    writer.writerows(sorted(lines, key=itemgetter(0)))
+    writer.writerows(
+        sorted(
+            lines,
+            key=itemgetter(0),  # type: ignore[arg-type]
+        )
+    )
diff --git a/scripts/export-cldr-case.py b/scripts/export-cldr-case.py
index 5bae2d9c3..d81ea1255 100755
--- a/scripts/export-cldr-case.py
+++ b/scripts/export-cldr-case.py
@@ -4,6 +4,8 @@
 #
 # SPDX-License-Identifier: MIT
 
+from __future__ import annotations
+
 import csv
 import json
 
@@ -11,8 +13,7 @@
 with open("languages.csv") as csvfile:
     reader = csv.reader(csvfile, delimiter=",")
     next(reader)
-    LANGUAGES = list(reader)
-    LANGUAGE_CODES = {lang[0] for lang in LANGUAGES}
+    LANGUAGE_CODES = {lang[0] for lang in reader}
 
 # Read
 with open("case-insensitive.csv") as csvfile:
@@ -27,8 +28,12 @@
 with open(
     "modules/cldr-json/cldr-json/cldr-core/supplemental/languageData.json",
 ) as handle:
-    LANGUAGES = json.load(handle)["supplemental"]["languageData"]
+    LANGUAGES: dict[str, dict[str, str]] = json.load(handle)["supplemental"][
+        "languageData"
+    ]
 
+base: str
+script: str | None
 for code in LANGUAGE_CODES:
     if "_" in code:
         base, script = code.split("_", 1)
diff --git a/scripts/export-cldr-population.py b/scripts/export-cldr-population.py
index 1f9db7606..21c820e71 100755
--- a/scripts/export-cldr-population.py
+++ b/scripts/export-cldr-population.py
@@ -14,7 +14,7 @@
 with open(
     "modules/cldr-json/cldr-json/cldr-core/supplemental/territoryInfo.json",
 ) as handle:
-    languages = defaultdict(int)
+    languages: dict[str, float] = defaultdict(float)
     for code, territory in json.load(handle)["supplemental"]["territoryInfo"].items():
         population = int(territory["_population"])
         if "languagePopulation" not in territory:
diff --git a/scripts/export-cldr.py b/scripts/export-cldr.py
index b918af786..4d3e62b12 100755
--- a/scripts/export-cldr.py
+++ b/scripts/export-cldr.py
@@ -8,6 +8,7 @@
 
 import json
 import re
+from typing import Literal, TypedDict
 
 MAPPINGS = {
     "ar_001": "ar",
@@ -112,7 +113,7 @@ def convert_atom(atom: str) -> str | bool:
     )
 
 
-def convert_formula(cldr_formula_and_examples: str) -> str:
+def convert_formula(cldr_formula_and_examples: str) -> str | bool:
     # Skip formulas which do not trigger integer
     if "@integer" not in cldr_formula_and_examples:
         return False
@@ -169,7 +170,9 @@ def convert_formula(cldr_formula_and_examples: str) -> str:
     return " || ".join(chunks)
 
 
-def reverse_formula(formula: str) -> str:
+def reverse_formula(formula: str | bool) -> str:
+    if isinstance(formula, bool):
+        raise TypeError(f"Unable to reverse the formula {formula!r}")
     if re.match(r"^n( % \d+)? == \d+(\.\.\d+|,\d+)*?$", formula):
         return formula.replace(" == ", " != ")
     if re.match(r"^n( % \d+)? != \d+(\.\.\d+|,\d+)*?$", formula):
@@ -193,14 +196,16 @@ def reverse_formula(formula: str) -> str:
     if formula == "(n == 0 || n == 1) || n >= 11 && n <= 99":
         return "n >= 2 && (n < 11 || n > 99)"
 
-    raise ValueError(f"Unable to reverse the formula '{formula}'")
+    raise ValueError(f"Unable to reverse the formula {formula!r}")
 
 
-def merge_formulas(formulas: list[str]) -> str:
+def merge_formulas(formulas: list[str | Literal[True]]) -> str:
     max_n = len(formulas) - 1
     formula = f"{max_n}"
     for n in range(max_n - 1, -1, -1):
         part = formulas[n]
+        if isinstance(part, bool):
+            raise TypeError(f"Not supported part {part!r}")
 
         if not re.match(r"^\([^()]+\)$", part):
             part = f"({part})"
@@ -211,6 +216,14 @@ def merge_formulas(formulas: list[str]) -> str:
     return formula
 
 
+class LanguageDict(TypedDict, total=False):
+    code: str
+    name: str
+    plurals: int
+    formula: str
+
+
+LANGUAGES: dict[str, LanguageDict]
 # Load language names
 with open(
     "modules/cldr-json/cldr-json/cldr-localenames-full/main/en/languages.json",
diff --git a/scripts/export-plural-tags.py b/scripts/export-plural-tags.py
index c3e3a9759..d2d73fc5a 100755
--- a/scripts/export-plural-tags.py
+++ b/scripts/export-plural-tags.py
@@ -9,8 +9,8 @@
 import pprint
 import subprocess
 
-BASE = "modules/cldr-json/cldr-json/cldr-core/supplemental/plurals.json"
-ALIASES = "modules/cldr-json/cldr-json/cldr-core/supplemental/aliases.json"
+BASE_FILE = "modules/cldr-json/cldr-json/cldr-core/supplemental/plurals.json"
+ALIASES_FILE = "modules/cldr-json/cldr-json/cldr-core/supplemental/aliases.json"
 
 HEADER = '''# Copyright © Michal Čihař <michal@weblate.org>
 #
@@ -93,7 +93,7 @@
 result = {}
 decimals = {}
 
-with open(BASE) as handle:
+with open(BASE_FILE) as handle:
     data = json.load(handle)
 
 for locale, rules in data["supplemental"]["plurals-type-cardinal"].items():
@@ -105,7 +105,7 @@
     decimals[locale] = [name.replace("pluralRule-count-", "") for name in rules]
 
 # Process CLDR
-with open(ALIASES) as handle:
+with open(ALIASES_FILE) as handle:
     aliases = json.load(handle)
 
 for code, alias in aliases["supplemental"]["metadata"]["alias"][
diff --git a/scripts/generate-language-data.py b/scripts/generate-language-data.py
index 0a3b93d5e..f0ea1335b 100755
--- a/scripts/generate-language-data.py
+++ b/scripts/generate-language-data.py
@@ -135,7 +135,7 @@ def get_population(code):
         except KeyError:
             continue
         if existing != number:
-            CLDRPLURALS.append((code, LANGUAGE_NAMES[code], number, equation))
+            CLDRPLURALS.append([code, LANGUAGE_NAMES[code], number, equation])
 
 # Read default countries
 with open("default_countries.csv") as csvfile: