Skip to content

Commit 17d2dd3

Browse files
authored
build: upgrade python+dependency versions (#211)
* build: bump min python to v3.12 * ci: run on pythons 3.12 and 3.13 * build: bump lingua from v2.1 to v2.2 * fix: preload lingua models * build: bump ty to latest version * types: ignore a bunch of incorrect ty violations wow, when did ty get so fucking stupid * ci: bump setup-uv to v7.6 * build: bump thinc
1 parent 6d47451 commit 17d2dd3

File tree

7 files changed

+117
-415
lines changed

7 files changed

+117
-415
lines changed

.github/actions/setup-python-env/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ runs:
1414
with:
1515
python-version: ${{ inputs.python-version }}
1616
- name: Install uv
17-
uses: astral-sh/setup-uv@v7.3.0
17+
uses: astral-sh/setup-uv@v7.6.0
1818
with:
1919
python-version: ${{ inputs.python-version }}
2020
version-file: "pyproject.toml"

.github/workflows/checks.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
strategy:
3131
fail-fast: false
3232
matrix:
33-
python-version: ["3.11", "3.12"]
33+
python-version: ["3.12", "3.13"]
3434
# TODO: also run on macos-latest pending docker/colima issue
3535
os: [ubuntu-latest]
3636
services:
@@ -84,7 +84,7 @@ jobs:
8484
strategy:
8585
fail-fast: false
8686
matrix:
87-
python-version: ["3.11", "3.12"]
87+
python-version: ["3.12", "3.13"]
8888
# TODO: also run on macos-latest pending docker/colima issue
8989
os: [ubuntu-latest]
9090
steps:
@@ -105,7 +105,7 @@ jobs:
105105
strategy:
106106
fail-fast: false
107107
matrix:
108-
python-version: ["3.11", "3.12"]
108+
python-version: ["3.12", "3.13"]
109109
# TODO: also run on macos-latest pending docker/colima issue
110110
os: [ubuntu-latest]
111111
steps:

colandr/lib/fileio/ris.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def sanitize(references: list[dict]) -> list[dict]:
104104
return references
105105

106106

107-
def _sanitize_reference(reference: dict) -> dict:
107+
def _sanitize_reference(reference: dict[str, t.Any]) -> dict:
108108
# rename certain tags with their type-specific names
109109
if reference["type_of_reference"] in REF_TYPE_TAG_OVERRIDES:
110110
tag_overrides = REF_TYPE_TAG_OVERRIDES[reference["type_of_reference"]]
@@ -132,19 +132,19 @@ def _sanitize_reference(reference: dict) -> dict:
132132
break
133133
# handle authors specified all together on one line
134134
if "authors" in reference:
135-
reference["authors"] = _split_up_authors(reference["authors"])
135+
reference["authors"] = _split_up_authors(reference["authors"]) # type: ignore[invalid-argument-type]
136136
# clean notes text, which may contain html tags and markup
137137
if "notes" in reference:
138-
reference["notes"] = _strip_tags_from_notes(reference["notes"])
138+
reference["notes"] = _strip_tags_from_notes(reference["notes"]) # type: ignore[invalid-argument-type]
139139
# split date key into year (if needed) and month
140140
if reference.get("date"):
141-
reference["pub_month"] = reference["date"].month
141+
reference["pub_month"] = reference["date"].month # type: ignore[unresolved-attribute]
142142
if "pub_year" not in reference:
143-
reference["pub_year"] = reference["date"].year
143+
reference["pub_year"] = reference["date"].year # type: ignore[unresolved-attribute]
144144
# HACK: cast dttms to dt strings to avoid json encoding error
145145
reference.update(
146146
{
147-
key: reference[key].strftime("%Y-%m-%d")
147+
key: reference[key].strftime("%Y-%m-%d") # type: ignore[unresolved-attribute]
148148
for key in DTTM_KEYS
149149
if reference.get(key)
150150
}
@@ -154,7 +154,7 @@ def _sanitize_reference(reference: dict) -> dict:
154154
if "start_page" in reference and "end_page" in reference:
155155
try:
156156
reference["number_of_pages"] = (
157-
reference["end_page"] - reference["start_page"]
157+
reference["end_page"] - reference["start_page"] # type: ignore[unsupported-operator]
158158
)
159159
except TypeError:
160160
pass

colandr/lib/models/deduper_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def _estimate_prob_two_records_match(self) -> None:
332332
splink.block_on("title", "author"),
333333
]
334334
self.model.training.estimate_probability_two_random_records_match(
335-
deterministic_rules,
335+
deterministic_rules, # type: ignore[invalid-argument-type]
336336
recall=0.7,
337337
)
338338

colandr/lib/nlp/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# and run that in "high-accuracy" mode; unclear if this is worth the trade-off
2222
# .from_languages(Language.CHINESE, Language.ENGLISH, Language.FRENCH, Language.JAPANESE, Language.SPANISH)
2323
.with_minimum_relative_distance(0.8)
24+
.with_preloaded_language_models()
2425
.build()
2526
)
2627

pyproject.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ classifiers = [
99
"Intended Audience :: Science/Research",
1010
"Programming Language :: Python",
1111
"Programming Language :: Python :: 3",
12-
"Programming Language :: Python :: 3.11",
1312
"Programming Language :: Python :: 3.12",
13+
"Programming Language :: Python :: 3.13",
1414
]
15-
requires-python = ">= 3.11"
15+
requires-python = ">= 3.12"
1616
dependencies = [
1717
"alembic~=1.18.0",
1818
"apiflask~=3.0.0",
@@ -32,7 +32,7 @@ dependencies = [
3232
"gunicorn~=23.0",
3333
"jinja2~=3.1.0",
3434
"joblib~=1.5.0",
35-
"lingua-language-detector~=2.1.0",
35+
"lingua-language-detector~=2.2.0",
3636
"markupsafe~=3.0.0",
3737
"marshmallow~=4.1.0",
3838
"numpy~=2.4.1",
@@ -66,7 +66,7 @@ dev = [
6666
"pytest-postgresql~=7.0",
6767
"SQLAlchemy-Utils~=0.42.0",
6868
# TODO: update ty once officially out of beta
69-
"ty~=0.0.17",
69+
"ty~=0.0.24",
7070
"ruff~=0.15.0",
7171
]
7272

0 commit comments

Comments
 (0)