Skip to content

Commit a252e34

Browse files
committed
Deprecated Python 3.9
1 parent 32b0375 commit a252e34

File tree

9 files changed

+60
-489
lines changed

9 files changed

+60
-489
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
strategy:
1111
matrix:
1212
os: ["ubuntu-latest"]
13-
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
13+
python-version: ["3.10", "3.11", "3.12", "3.13"]
1414
fail-fast: false
1515

1616
steps:

model2vec/distill/distillation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
import os
55
import re
6-
from typing import Optional, cast
6+
from typing import cast
77

88
import numpy as np
99
from huggingface_hub.hf_api import model_info
@@ -87,8 +87,8 @@ def distill_from_model(
8787
if not all_tokens:
8888
raise ValueError("The vocabulary is empty after preprocessing. Please check your token_remove_pattern.")
8989

90-
unk_token = cast(Optional[str], tokenizer.special_tokens_map.get("unk_token"))
91-
pad_token = cast(Optional[str], tokenizer.special_tokens_map.get("pad_token"))
90+
unk_token = cast(str | None, tokenizer.special_tokens_map.get("unk_token"))
91+
pad_token = cast(str | None, tokenizer.special_tokens_map.get("pad_token"))
9292

9393
# Weird if to satsify mypy
9494
if pad_token is None:

model2vec/distill/inference.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
# -*- coding: utf-8 -*-
21
from __future__ import annotations
32

43
import inspect
54
import logging
65
from enum import Enum
76
from pathlib import Path
8-
from typing import Literal, Union
7+
from typing import Literal
98

109
import numpy as np
1110
import torch
@@ -17,8 +16,8 @@
1716

1817
logger = logging.getLogger(__name__)
1918

20-
PathLike = Union[Path, str]
21-
PCADimType = Union[int, None, float, Literal["auto"]]
19+
PathLike = Path | str
20+
PCADimType = int | None | float | Literal["auto"]
2221

2322
_DEFAULT_BATCH_SIZE = 256
2423

model2vec/inference/model.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from __future__ import annotations
22

33
import re
4+
from collections.abc import Sequence
45
from pathlib import Path
56
from tempfile import TemporaryDirectory
6-
from typing import Sequence, TypeVar, Union, cast
7+
from typing import TypeVar, cast
78

89
import huggingface_hub
910
import numpy as np
@@ -293,14 +294,14 @@ def evaluate_single_or_multi_label(
293294
"""
294295
if _is_multi_label_shaped(y):
295296
# Cast because the type checker doesn't understand that y is a list of lists.
296-
y = cast(Union[list[list[str]], list[list[int]]], y)
297+
y = cast(list[list[str]] | list[list[int]], y)
297298
classes = sorted(set([label for labels in y for label in labels]))
298299
mlb = MultiLabelBinarizer(classes=classes)
299300
y_transformed = mlb.fit_transform(y)
300301
predictions_transformed = mlb.transform(predictions)
301302
else:
302303
if all(isinstance(label, (str, int)) for label in y):
303-
y = cast(Union[list[str], list[int]], y)
304+
y = cast(list[str] | list[int], y)
304305
classes = sorted(set(y))
305306
y_transformed = np.array(y)
306307
predictions_transformed = np.array(predictions)

model2vec/model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22

33
import math
44
import os
5+
from collections.abc import Iterator, Sequence
56
from logging import getLogger
67
from pathlib import Path
78
from tempfile import TemporaryDirectory
8-
from typing import Any, Iterator, Sequence, Union, overload
9+
from typing import Any, overload
910

1011
import numpy as np
1112
from joblib import delayed
@@ -15,7 +16,7 @@
1516
from model2vec.quantization import DType, quantize_and_reduce_dim
1617
from model2vec.utils import ProgressParallel
1718

18-
PathLike = Union[Path, str]
19+
PathLike = Path | str
1920

2021
logger = getLogger(__name__)
2122

model2vec/tokenizer/tokenizer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44
import logging
55
import re
6-
from typing import Any, Optional, cast
6+
from typing import Any, cast
77

88
from tokenizers import Tokenizer
99
from tokenizers.normalizers import Normalizer
@@ -387,8 +387,8 @@ def create_tokenizer(
387387
:param token_remove_regex: The regex to use to remove tokens from the vocabulary.
388388
:return: The created tokenizer.
389389
"""
390-
unk_token = cast(Optional[str], tokenizer.special_tokens_map.get("unk_token"))
391-
pad_token = cast(Optional[str], tokenizer.special_tokens_map.get("pad_token"))
390+
unk_token = cast(str | None, tokenizer.special_tokens_map.get("unk_token"))
391+
pad_token = cast(str | None, tokenizer.special_tokens_map.get("pad_token"))
392392
cleaned_vocabulary, backend_tokenizer = clean_and_create_vocabulary(tokenizer, vocabulary, token_remove_regex)
393393
new_tokenizer = replace_vocabulary(backend_tokenizer, cleaned_vocabulary, unk_token, pad_token)
394394

model2vec/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# -*- coding: utf-8 -*-
21
from __future__ import annotations
32

43
import logging
54
import re
5+
from collections.abc import Iterator
66
from importlib import import_module
77
from importlib.metadata import metadata
8-
from typing import Any, Iterator, Protocol
8+
from typing import Any, Protocol
99

1010
import numpy as np
1111
from joblib import Parallel

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "model2vec"
33
description = "Fast State-of-the-Art Static Embeddings"
44
readme = { file = "README.md", content-type = "text/markdown" }
55
license = { file = "LICENSE" }
6-
requires-python = ">=3.9"
6+
requires-python = ">=3.10"
77
authors = [{ name = "Stéphan Tulkens", email = "[email protected]"}, {name = "Thomas van Dongen", email = "[email protected]"}]
88
dynamic = ["version"]
99

@@ -15,7 +15,6 @@ classifiers = [
1515
"Topic :: Software Development :: Libraries",
1616
"License :: OSI Approved :: MIT License",
1717
"Programming Language :: Python :: 3 :: Only",
18-
"Programming Language :: Python :: 3.9",
1918
"Programming Language :: Python :: 3.10",
2019
"Programming Language :: Python :: 3.11",
2120
"Programming Language :: Python :: 3.12",

0 commit comments

Comments
 (0)