|
13 | 13 | # See the License for the specific language governing permissions and
|
14 | 14 | # limitations under the License.
|
15 | 15 | import json
|
16 |
| -import logging |
17 | 16 | import os
|
18 | 17 | import warnings
|
19 | 18 | from typing import Dict, Optional, Union
|
20 | 19 |
|
21 | 20 | import transformers as hf
|
22 |
| -from transformers import PretrainedConfig |
| 21 | +from transformers import AutoConfig, PretrainedConfig |
23 | 22 | from transformers.dynamic_module_utils import (
|
24 | 23 | get_class_from_dynamic_module,
|
25 | 24 | resolve_trust_remote_code,
|
|
40 | 39 | from transformers.utils import cached_file
|
41 | 40 |
|
42 | 41 | from ...utils.download import DownloadSource, resolve_file_path
|
| 42 | +from ...utils.log import logger |
43 | 43 | from ..tokenizer_utils import PaddleTokenizerMixin
|
44 |
| -from .configuration import AutoConfig |
45 |
| - |
46 |
| -logger = logging.getLogger(__name__) |
47 | 44 |
|
48 | 45 |
|
49 | 46 | def get_paddleformers_tokenizer_config(
|
@@ -158,7 +155,6 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
|
158 | 155 | download_hub = kwargs.get("download_hub", None)
|
159 | 156 | if download_hub is None:
|
160 | 157 | download_hub = os.environ.get("DOWNLOAD_SOURCE", "huggingface")
|
161 |
| - logger.info(f"Using download source: {download_hub}") |
162 | 158 | use_auth_token = kwargs.pop("use_auth_token", None)
|
163 | 159 | if use_auth_token is not None:
|
164 | 160 | warnings.warn(
|
@@ -216,7 +212,29 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
|
216 | 212 | kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
|
217 | 213 | config_tokenizer_class = tokenizer_config.get("tokenizer_class")
|
218 | 214 | else:
|
219 |
| - tokenizer_config = get_paddleformers_tokenizer_config(pretrained_model_name_or_path, **kwargs) |
| 215 | + try: |
| 216 | + tokenizer_config = get_paddleformers_tokenizer_config(pretrained_model_name_or_path, **kwargs) |
| 217 | + except Exception as e: |
| 218 | + if any( |
| 219 | + keyword in str(e).lower() |
| 220 | + for keyword in ["not exist", "not found", "entrynotfound", "notexist", "does not appear"] |
| 221 | + ): |
| 222 | + hf_link = f"https://huggingface.co/{pretrained_model_name_or_path}" |
| 223 | + modelscope_link = f"https://modelscope.cn/models/{pretrained_model_name_or_path}" |
| 224 | + encoded_model_name = pretrained_model_name_or_path.replace("/", "%2F") |
| 225 | + aistudio_link = f"https://aistudio.baidu.com/modelsoverview?sortBy=weight&q={encoded_model_name}" |
| 226 | + |
| 227 | + raise ValueError( |
| 228 | + f"Unable to find {TOKENIZER_CONFIG_FILE} in the model repository '{pretrained_model_name_or_path}'. Please check:\n" |
| 229 | + f"The model repository ID is correct for your chosen source:\n" |
| 230 | + f" - Hugging Face Hub: {hf_link}\n" |
| 231 | + f" - ModelScope: {modelscope_link}\n" |
| 232 | + f" - AI Studio: {aistudio_link}\n" |
| 233 | + f"Note: The repository ID may differ between ModelScope, AI Studio, and Hugging Face Hub.\n" |
| 234 | + f"You are currently using the download source: {download_hub}. Please check the repository ID on the official website." |
| 235 | + ) from None |
| 236 | + else: |
| 237 | + raise |
220 | 238 | config_tokenizer_class = tokenizer_config.get("tokenizer_class")
|
221 | 239 |
|
222 | 240 | tokenizer_auto_map = None
|
|
0 commit comments