Skip to content

Commit 98546da

Browse files
committed
fix: 3.9 error
1 parent 02f5591 commit 98546da

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

model2vec/distill/distillation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
import os
55
import re
6-
from typing import cast
6+
from typing import Optional, cast
77

88
import numpy as np
99
from huggingface_hub import model_info
@@ -85,8 +85,8 @@ def distill_from_model(
8585
if not all_tokens:
8686
raise ValueError("The vocabulary is empty after preprocessing. Please check your token_remove_pattern.")
8787

88-
unk_token = cast(str | None, tokenizer.special_tokens_map.get("unk_token"))
89-
pad_token = cast(str | None, tokenizer.special_tokens_map.get("pad_token"))
88+
unk_token = cast(Optional[str], tokenizer.special_tokens_map.get("unk_token"))
89+
pad_token = cast(Optional[str], tokenizer.special_tokens_map.get("pad_token"))
9090

9191
# Weird if to satsify mypy
9292
if pad_token is None:

model2vec/tokenizer/tokenizer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44
import logging
55
import re
6-
from typing import Any, cast
6+
from typing import Any, Optional, cast
77

88
from tokenizers import Tokenizer
99
from tokenizers.normalizers import Normalizer
@@ -374,8 +374,8 @@ def create_tokenizer(
374374
:param token_remove_regex: The regex to use to remove tokens from the vocabulary.
375375
:return: The created tokenizer.
376376
"""
377-
unk_token = cast(str | None, tokenizer.special_tokens_map.get("unk_token"))
378-
pad_token = cast(str | None, tokenizer.special_tokens_map.get("pad_token"))
377+
unk_token = cast(Optional[str], tokenizer.special_tokens_map.get("unk_token"))
378+
pad_token = cast(Optional[str], tokenizer.special_tokens_map.get("pad_token"))
379379
cleaned_vocabulary, backend_tokenizer = clean_and_create_vocabulary(tokenizer, vocabulary, token_remove_regex)
380380
new_tokenizer = replace_vocabulary(backend_tokenizer, cleaned_vocabulary, unk_token, pad_token)
381381

0 commit comments

Comments
 (0)