Skip to content

Commit b304a2a

Browse files
authored
new: drop python3.9, replace optional and union with | (#574)
* new: drop python3.9, replace optional and union with | * new: remove python 3.9 from pyproject * refactor: replace remaining union and optional with | * new: remove optional and union in dataclasses * fix: add typealias to numpy type * new: replace union with | in token count
1 parent c715416 commit b304a2a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+528
-1324
lines changed

.github/workflows/python-publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
- name: Set up Python
2626
uses: actions/setup-python@v2
2727
with:
28-
python-version: '3.9.x'
28+
python-version: '3.10.x'
2929
- name: Install dependencies
3030
run: |
3131
python -m pip install poetry

.github/workflows/python-tests.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ jobs:
1515
strategy:
1616
matrix:
1717
python-version:
18-
- '3.9.x'
1918
- '3.10.x'
2019
- '3.11.x'
2120
- '3.12.x'

.github/workflows/type-checkers.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
strategy:
99
fail-fast: true
1010
matrix:
11-
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
11+
python-version: ["3.10", "3.11", "3.12", "3.13"]
1212
os: [ubuntu-latest]
1313

1414
name: Python ${{ matrix.python-version }} test

fastembed/common/model_description.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from dataclasses import dataclass, field
22
from enum import Enum
3-
from typing import Optional, Any
3+
from typing import Any
44

55

66
@dataclass(frozen=True)
77
class ModelSource:
8-
hf: Optional[str] = None
9-
url: Optional[str] = None
8+
hf: str | None = None
9+
url: str | None = None
1010
_deprecated_tar_struct: bool = False
1111

1212
@property
@@ -33,17 +33,17 @@ class BaseModelDescription:
3333

3434
@dataclass(frozen=True)
3535
class DenseModelDescription(BaseModelDescription):
36-
dim: Optional[int] = None
37-
tasks: Optional[dict[str, Any]] = field(default_factory=dict)
36+
dim: int | None = None
37+
tasks: dict[str, Any] | None = field(default_factory=dict)
3838

3939
def __post_init__(self) -> None:
4040
assert self.dim is not None, "dim is required for dense model description"
4141

4242

4343
@dataclass(frozen=True)
4444
class SparseModelDescription(BaseModelDescription):
45-
requires_idf: Optional[bool] = None
46-
vocab_size: Optional[int] = None
45+
requires_idf: bool | None = None
46+
vocab_size: int | None = None
4747

4848

4949
class PoolingType(str, Enum):

fastembed/common/model_management.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import tarfile
66
from copy import deepcopy
77
from pathlib import Path
8-
from typing import Any, Optional, Union, TypeVar, Generic
8+
from typing import Any, TypeVar, Generic
99

1010
import requests
1111
from huggingface_hub import snapshot_download, model_info, list_repo_tree
@@ -180,8 +180,8 @@ def _verify_files_from_metadata(
180180

181181
def _collect_file_metadata(
182182
model_dir: Path, repo_files: list[RepoFile]
183-
) -> dict[str, dict[str, Union[int, str]]]:
184-
meta: dict[str, dict[str, Union[int, str]]] = {}
183+
) -> dict[str, dict[str, int | str]]:
184+
meta: dict[str, dict[str, int | str]] = {}
185185
file_info_map = {f.path: f for f in repo_files}
186186
for file_path in model_dir.rglob("*"):
187187
if file_path.is_file() and file_path.name != cls.METADATA_FILE:
@@ -193,9 +193,7 @@ def _collect_file_metadata(
193193
}
194194
return meta
195195

196-
def _save_file_metadata(
197-
model_dir: Path, meta: dict[str, dict[str, Union[int, str]]]
198-
) -> None:
196+
def _save_file_metadata(model_dir: Path, meta: dict[str, dict[str, int | str]]) -> None:
199197
try:
200198
if not model_dir.exists():
201199
model_dir.mkdir(parents=True, exist_ok=True)
@@ -397,7 +395,7 @@ def download_model(cls, model: T, cache_dir: str, retries: int = 3, **kwargs: An
397395
Path: The path to the downloaded model directory.
398396
"""
399397
local_files_only = kwargs.get("local_files_only", False)
400-
specific_model_path: Optional[str] = kwargs.pop("specific_model_path", None)
398+
specific_model_path: str | None = kwargs.pop("specific_model_path", None)
401399
if specific_model_path:
402400
return Path(specific_model_path)
403401
retries = 1 if local_files_only else retries

fastembed/common/onnx_model.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import warnings
22
from dataclasses import dataclass
33
from pathlib import Path
4-
from typing import Any, Generic, Iterable, Optional, Sequence, Type, TypeVar
4+
from typing import Any, Generic, Iterable, Sequence, Type, TypeVar
55

66
import numpy as np
77
import onnxruntime as ort
@@ -19,8 +19,8 @@
1919
@dataclass
2020
class OnnxOutputContext:
2121
model_output: NumpyArray
22-
attention_mask: Optional[NDArray[np.int64]] = None
23-
input_ids: Optional[NDArray[np.int64]] = None
22+
attention_mask: NDArray[np.int64] | None = None
23+
input_ids: NDArray[np.int64] | None = None
2424

2525

2626
class OnnxModel(Generic[T]):
@@ -43,8 +43,8 @@ def _post_process_onnx_output(self, output: OnnxOutputContext, **kwargs: Any) ->
4343
raise NotImplementedError("Subclasses must implement this method")
4444

4545
def __init__(self) -> None:
46-
self.model: Optional[ort.InferenceSession] = None
47-
self.tokenizer: Optional[Tokenizer] = None
46+
self.model: ort.InferenceSession | None = None
47+
self.tokenizer: Tokenizer | None = None
4848

4949
def _preprocess_onnx_input(
5050
self, onnx_input: dict[str, NumpyArray], **kwargs: Any
@@ -58,11 +58,11 @@ def _load_onnx_model(
5858
self,
5959
model_dir: Path,
6060
model_file: str,
61-
threads: Optional[int],
62-
providers: Optional[Sequence[OnnxProvider]] = None,
61+
threads: int | None,
62+
providers: Sequence[OnnxProvider] | None = None,
6363
cuda: bool = False,
64-
device_id: Optional[int] = None,
65-
extra_session_options: Optional[dict[str, Any]] = None,
64+
device_id: int | None = None,
65+
extra_session_options: dict[str, Any] | None = None,
6666
) -> None:
6767
model_path = model_dir / model_file
6868
# List of Execution Providers: https://onnxruntime.ai/docs/execution-providers

fastembed/common/types.py

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,20 @@
11
from pathlib import Path
2-
import sys
3-
from PIL import Image
4-
from typing import Any, Union
2+
3+
from typing import Any, TypeAlias
54
import numpy as np
65
from numpy.typing import NDArray
7-
8-
if sys.version_info >= (3, 10):
9-
from typing import TypeAlias
10-
else:
11-
from typing_extensions import TypeAlias
6+
from PIL import Image
127

138

14-
PathInput: TypeAlias = Union[str, Path]
15-
ImageInput: TypeAlias = Union[PathInput, Image.Image]
9+
PathInput: TypeAlias = str | Path
10+
ImageInput: TypeAlias = PathInput | Image.Image
1611

17-
OnnxProvider: TypeAlias = Union[str, tuple[str, dict[Any, Any]]]
18-
NumpyArray = Union[
19-
NDArray[np.float64],
20-
NDArray[np.float32],
21-
NDArray[np.float16],
22-
NDArray[np.int8],
23-
NDArray[np.int64],
24-
NDArray[np.int32],
25-
]
12+
OnnxProvider: TypeAlias = str | tuple[str, dict[Any, Any]]
13+
NumpyArray: TypeAlias = (
14+
NDArray[np.float64]
15+
| NDArray[np.float32]
16+
| NDArray[np.float16]
17+
| NDArray[np.int8]
18+
| NDArray[np.int64]
19+
| NDArray[np.int32]
20+
)

fastembed/common/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import unicodedata
66
from pathlib import Path
77
from itertools import islice
8-
from typing import Iterable, Optional, TypeVar
8+
from typing import Iterable, TypeVar
99

1010
import numpy as np
1111
from numpy.typing import NDArray
@@ -45,7 +45,7 @@ def iter_batch(iterable: Iterable[T], size: int) -> Iterable[list[T]]:
4545
yield b
4646

4747

48-
def define_cache_dir(cache_dir: Optional[str] = None) -> Path:
48+
def define_cache_dir(cache_dir: str | None = None) -> Path:
4949
"""
5050
Define the cache directory for fastembed
5151
"""

fastembed/embedding.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Optional, Any
1+
from typing import Any
22

33
from loguru import logger
44

@@ -17,8 +17,8 @@ class JinaEmbedding(TextEmbedding):
1717
def __init__(
1818
self,
1919
model_name: str = "jinaai/jina-embeddings-v2-base-en",
20-
cache_dir: Optional[str] = None,
21-
threads: Optional[int] = None,
20+
cache_dir: str | None = None,
21+
threads: int | None = None,
2222
**kwargs: Any,
2323
):
2424
super().__init__(model_name, cache_dir, threads, **kwargs)

fastembed/image/image_embedding.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Iterable, Optional, Sequence, Type, Union
1+
from typing import Any, Iterable, Sequence, Type
22
from dataclasses import asdict
33

44
from fastembed.common.types import NumpyArray
@@ -48,11 +48,11 @@ def _list_supported_models(cls) -> list[DenseModelDescription]:
4848
def __init__(
4949
self,
5050
model_name: str,
51-
cache_dir: Optional[str] = None,
52-
threads: Optional[int] = None,
53-
providers: Optional[Sequence[OnnxProvider]] = None,
51+
cache_dir: str | None = None,
52+
threads: int | None = None,
53+
providers: Sequence[OnnxProvider] | None = None,
5454
cuda: bool = False,
55-
device_ids: Optional[list[int]] = None,
55+
device_ids: list[int] | None = None,
5656
lazy_load: bool = False,
5757
**kwargs: Any,
5858
):
@@ -98,7 +98,7 @@ def get_embedding_size(cls, model_name: str) -> int:
9898
ValueError: If the model name is not found in the supported models.
9999
"""
100100
descriptions = cls._list_supported_models()
101-
embedding_size: Optional[int] = None
101+
embedding_size: int | None = None
102102
for description in descriptions:
103103
if description.model.lower() == model_name.lower():
104104
embedding_size = description.dim
@@ -113,9 +113,9 @@ def get_embedding_size(cls, model_name: str) -> int:
113113

114114
def embed(
115115
self,
116-
images: Union[ImageInput, Iterable[ImageInput]],
116+
images: ImageInput | Iterable[ImageInput],
117117
batch_size: int = 16,
118-
parallel: Optional[int] = None,
118+
parallel: int | None = None,
119119
**kwargs: Any,
120120
) -> Iterable[NumpyArray]:
121121
"""

0 commit comments

Comments
 (0)