|
7 | 7 | import warnings
|
8 | 8 | from functools import lru_cache
|
9 | 9 | from pathlib import Path
|
10 |
| -from types import MethodType |
11 | 10 | from typing import TYPE_CHECKING, Any, Optional, Union
|
12 | 11 |
|
13 | 12 | import huggingface_hub
|
@@ -144,26 +143,6 @@ def __reduce__(self):
|
144 | 143 | return cached_tokenizer
|
145 | 144 |
|
146 | 145 |
|
147 |
| -def patch_padding_side(tokenizer: PreTrainedTokenizer) -> None: |
148 |
| - """Patch _pad method to accept `padding_side` for older tokenizers.""" |
149 |
| - orig_pad = tokenizer._pad |
150 |
| - |
151 |
| - def _pad( |
152 |
| - self: PreTrainedTokenizer, |
153 |
| - *args, |
154 |
| - padding_side: Optional[str] = None, |
155 |
| - **kwargs, |
156 |
| - ): |
157 |
| - if padding_side is not None and padding_side != self.padding_side: |
158 |
| - msg = ("`padding_side` argument is not supported by " |
159 |
| - f"{type(tokenizer).__name__} and will be ignored.") |
160 |
| - warnings.warn(msg, stacklevel=2) |
161 |
| - |
162 |
| - return orig_pad(*args, **kwargs) |
163 |
| - |
164 |
| - tokenizer._pad = MethodType(_pad, tokenizer) |
165 |
| - |
166 |
| - |
167 | 146 | def get_tokenizer(
|
168 | 147 | tokenizer_name: Union[str, Path],
|
169 | 148 | *args,
|
@@ -271,12 +250,6 @@ def get_tokenizer(
|
271 | 250 | }
|
272 | 251 | tokenizer.add_special_tokens(special_tokens_map)
|
273 | 252 |
|
274 |
| - # NOTE: We can remove this after https://github.com/zai-org/ChatGLM3/issues/1324 |
275 |
| - if type(tokenizer).__name__ in ("ChatGLMTokenizer", |
276 |
| - "ChatGLM4Tokenizer"): |
277 |
| - assert isinstance(tokenizer, PreTrainedTokenizer) |
278 |
| - patch_padding_side(tokenizer) |
279 |
| - |
280 | 253 | if not isinstance(tokenizer, PreTrainedTokenizerFast):
|
281 | 254 | logger.warning(
|
282 | 255 | "Using a slow tokenizer. This might cause a significant "
|
|
0 commit comments