build: upgrade python+dependency versions (#211)

bdewilde · web-flow · commit 17d2dd3e5664 · 2026-03-21T16:05:41.000-04:00
* build: bump min python to v3.12

* ci: run on pythons 3.12 and 3.13

* build: bump lingua from v2.1 to v2.2

* fix: preload lingua models

* build: bump ty to latest version

* types: ignore a bunch of incorrect ty violations

wow, when did ty get so fucking stupid

* ci: bump setup-uv to v7.6

* build: bump thinc
diff --git a/.github/actions/setup-python-env/action.yml b/.github/actions/setup-python-env/action.yml
@@ -14,7 +14,7 @@ runs:
       with:
         python-version: ${{ inputs.python-version }}
     - name: Install uv
-      uses: astral-sh/setup-uv@v7.3.0
+      uses: astral-sh/setup-uv@v7.6.0
       with:
         python-version: ${{ inputs.python-version }}
         version-file: "pyproject.toml"
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -30,7 +30,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.11", "3.12"]
+        python-version: ["3.12", "3.13"]
         # TODO: also run on macos-latest pending docker/colima issue
         os: [ubuntu-latest]
     services:
@@ -84,7 +84,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.11", "3.12"]
+        python-version: ["3.12", "3.13"]
         # TODO: also run on macos-latest pending docker/colima issue
         os: [ubuntu-latest]
     steps:
@@ -105,7 +105,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.11", "3.12"]
+        python-version: ["3.12", "3.13"]
         # TODO: also run on macos-latest pending docker/colima issue
         os: [ubuntu-latest]
     steps:
diff --git a/colandr/lib/fileio/ris.py b/colandr/lib/fileio/ris.py
@@ -104,7 +104,7 @@ def sanitize(references: list[dict]) -> list[dict]:
     return references
 
 
-def _sanitize_reference(reference: dict) -> dict:
+def _sanitize_reference(reference: dict[str, t.Any]) -> dict:
     # rename certain tags with their type-specific names
     if reference["type_of_reference"] in REF_TYPE_TAG_OVERRIDES:
         tag_overrides = REF_TYPE_TAG_OVERRIDES[reference["type_of_reference"]]
@@ -132,19 +132,19 @@ def _sanitize_reference(reference: dict) -> dict:
                     break
     # handle authors specified all together on one line
     if "authors" in reference:
-        reference["authors"] = _split_up_authors(reference["authors"])
+        reference["authors"] = _split_up_authors(reference["authors"])  # type: ignore[invalid-argument-type]
     # clean notes text, which may contain html tags and markup
     if "notes" in reference:
-        reference["notes"] = _strip_tags_from_notes(reference["notes"])
+        reference["notes"] = _strip_tags_from_notes(reference["notes"])  # type: ignore[invalid-argument-type]
     # split date key into year (if needed) and month
     if reference.get("date"):
-        reference["pub_month"] = reference["date"].month
+        reference["pub_month"] = reference["date"].month  # type: ignore[unresolved-attribute]
         if "pub_year" not in reference:
-            reference["pub_year"] = reference["date"].year
+            reference["pub_year"] = reference["date"].year  # type: ignore[unresolved-attribute]
     # HACK: cast dttms to dt strings to avoid json encoding error
     reference.update(
         {
-            key: reference[key].strftime("%Y-%m-%d")
+            key: reference[key].strftime("%Y-%m-%d")  # type: ignore[unresolved-attribute]
             for key in DTTM_KEYS
             if reference.get(key)
         }
@@ -154,7 +154,7 @@ def _sanitize_reference(reference: dict) -> dict:
         if "start_page" in reference and "end_page" in reference:
             try:
                 reference["number_of_pages"] = (
-                    reference["end_page"] - reference["start_page"]
+                    reference["end_page"] - reference["start_page"]  # type: ignore[unsupported-operator]
                 )
             except TypeError:
                 pass
diff --git a/colandr/lib/models/deduper_v2.py b/colandr/lib/models/deduper_v2.py
@@ -332,7 +332,7 @@ def _estimate_prob_two_records_match(self) -> None:
                 splink.block_on("title", "author"),
             ]
             self.model.training.estimate_probability_two_random_records_match(
-                deterministic_rules,
+                deterministic_rules,  # type: ignore[invalid-argument-type]
                 recall=0.7,
             )
 
diff --git a/colandr/lib/nlp/utils.py b/colandr/lib/nlp/utils.py
@@ -21,6 +21,7 @@
     # and run that in "high-accuracy" mode; unclear if this is worth the trade-off
     # .from_languages(Language.CHINESE, Language.ENGLISH, Language.FRENCH, Language.JAPANESE, Language.SPANISH)
     .with_minimum_relative_distance(0.8)
+    .with_preloaded_language_models()
     .build()
 )
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,10 +9,10 @@ classifiers = [
   "Intended Audience :: Science/Research",
   "Programming Language :: Python",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
 ]
-requires-python = ">= 3.11"
+requires-python = ">= 3.12"
 dependencies = [
   "alembic~=1.18.0",
   "apiflask~=3.0.0",
@@ -32,7 +32,7 @@ dependencies = [
   "gunicorn~=23.0",
   "jinja2~=3.1.0",
   "joblib~=1.5.0",
-  "lingua-language-detector~=2.1.0",
+  "lingua-language-detector~=2.2.0",
   "markupsafe~=3.0.0",
   "marshmallow~=4.1.0",
   "numpy~=2.4.1",
@@ -66,7 +66,7 @@ dev = [
   "pytest-postgresql~=7.0",
   "SQLAlchemy-Utils~=0.42.0",
   # TODO: update ty once officially out of beta
-  "ty~=0.0.17",
+  "ty~=0.0.24",
   "ruff~=0.15.0",
 ]
 
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -332,7 +332,7 @@ def _estimate_prob_two_records_match(self) -> None:`
`332`	`332`	`splink.block_on("title", "author"),`
`333`	`333`	`]`
`334`	`334`	`self.model.training.estimate_probability_two_random_records_match(`
`335`		`- deterministic_rules,`
	`335`	`+ deterministic_rules, # type: ignore[invalid-argument-type]`
`336`	`336`	`recall=0.7,`
`337`	`337`	`)`
`338`	`338`
Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@`
`21`	`21`	`# and run that in "high-accuracy" mode; unclear if this is worth the trade-off`
`22`	`22`	`# .from_languages(Language.CHINESE, Language.ENGLISH, Language.FRENCH, Language.JAPANESE, Language.SPANISH)`
`23`	`23`	`.with_minimum_relative_distance(0.8)`
	`24`	`+ .with_preloaded_language_models()`
`24`	`25`	`.build()`
`25`	`26`	`)`
`26`	`27`