Skip to content

Commit ed778dc

Browse files
authored
Add library name and version to hf_hub_download for better analytics (#3881)
* add lib name and version for hf hub download * add hf hub version
1 parent 14e6813 commit ed778dc

File tree

6 files changed

+29
-13
lines changed

6 files changed

+29
-13
lines changed

paddlenlp/transformers/auto/modeling.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from huggingface_hub import hf_hub_download
2222

23+
from paddlenlp import __version__
2324
from paddlenlp.transformers import *
2425
from paddlenlp.utils.downloader import (COMMUNITY_MODEL_PREFIX,
2526
get_path_from_url)
@@ -237,7 +238,9 @@ def _from_pretrained(cls,
237238
if from_hf_hub:
238239
config_file = hf_hub_download(repo_id=pretrained_model_name_or_path,
239240
filename=cls.model_config_file,
240-
cache_dir=MODEL_HOME)
241+
cache_dir=MODEL_HOME,
242+
library_name="PaddleNLP",
243+
library_version=__version__)
241244
if os.path.exists(config_file):
242245
model_class = cls._get_model_class_from_config(
243246
pretrained_model_name_or_path, config_file)

paddlenlp/transformers/auto/tokenizer.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from huggingface_hub import hf_hub_download
2222

23+
from paddlenlp import __version__
2324
from paddlenlp.transformers import *
2425
from paddlenlp.utils.downloader import (COMMUNITY_MODEL_PREFIX,
2526
get_path_from_url)
@@ -242,7 +243,9 @@ def from_pretrained(cls,
242243
if from_hf_hub:
243244
config_file = hf_hub_download(repo_id=pretrained_model_name_or_path,
244245
filename=cls.tokenizer_config_file,
245-
cache_dir=MODEL_HOME)
246+
cache_dir=MODEL_HOME,
247+
library_name="PaddleNLP",
248+
library_version=__version__)
246249
if os.path.exists(config_file):
247250
tokenizer_class = cls._get_tokenizer_class_from_config(
248251
pretrained_model_name_or_path, config_file, use_fast)

paddlenlp/transformers/configuration_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,9 @@ def _get_config_dict(cls, pretrained_model_name_or_path: Union[str,
748748
resolved_config_file = hf_hub_download(
749749
repo_id=pretrained_model_name_or_path,
750750
filename=CONFIG_NAME,
751-
cache_dir=MODEL_HOME)
751+
cache_dir=MODEL_HOME,
752+
library_name="PaddleNLP",
753+
library_version=__version__)
752754

753755
# 3. get the configuration file from url, eg: https://ip/path/to/model_config.jsons
754756
elif is_url(pretrained_model_name_or_path):

paddlenlp/transformers/model_utils.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,13 @@
3131
from paddle.nn import Embedding, Layer
3232
# TODO(fangzeyang) Temporary fix and replace by paddle framework downloader later
3333
from paddle.utils.download import is_url
34-
from paddlenlp.utils.downloader import (download_check, COMMUNITY_MODEL_PREFIX)
35-
from paddlenlp.utils.downloader import get_path_from_url_with_filelock
36-
from paddlenlp.utils.env import MODEL_HOME, LOCK_FILE_HOME
3734

38-
from paddlenlp.utils.log import logger
35+
from paddlenlp import __version__
36+
from paddlenlp.utils.downloader import (COMMUNITY_MODEL_PREFIX, download_check,
37+
get_path_from_url_with_filelock)
38+
from paddlenlp.utils.env import LOCK_FILE_HOME, MODEL_HOME
3939
from paddlenlp.utils.file_lock import FileLock
40+
from paddlenlp.utils.log import logger
4041

4142
from .configuration_utils import PretrainedConfig
4243
from .generation_utils import GenerationMixin
@@ -439,7 +440,9 @@ def from_pretrained(cls,
439440
resolved_resource_files[file_id] = hf_hub_download(
440441
repo_id=pretrained_model_name_or_path,
441442
filename=file_path,
442-
cache_dir=MODEL_HOME)
443+
cache_dir=MODEL_HOME,
444+
library_name="PaddleNLP",
445+
library_version=__version__)
443446
else:
444447
path = os.path.join(default_root, file_path.split('/')[-1])
445448
if os.path.exists(path):
@@ -848,7 +851,9 @@ def _resolve_model_file_path(cls: Type[PretrainedModel],
848851
return hf_hub_download(
849852
repo_id=pretrained_model_name_or_path,
850853
filename=cls.resource_files_names['model_state'],
851-
cache_dir=MODEL_HOME)
854+
cache_dir=MODEL_HOME,
855+
library_name="PaddleNLP",
856+
library_version=__version__)
852857

853858
# 2. when it is model-name
854859
if pretrained_model_name_or_path in cls.pretrained_init_configuration:

paddlenlp/transformers/tokenizer_utils_base.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@
3232
import paddle
3333
from huggingface_hub import hf_hub_download
3434

35-
from paddlenlp.utils.downloader import COMMUNITY_MODEL_PREFIX
36-
from paddlenlp.utils.downloader import get_path_from_url_with_filelock
35+
from paddlenlp import __version__
36+
from paddlenlp.utils.downloader import (COMMUNITY_MODEL_PREFIX,
37+
get_path_from_url_with_filelock)
3738
from paddlenlp.utils.env import MODEL_HOME
3839
from paddlenlp.utils.log import logger
3940

@@ -1567,7 +1568,9 @@ def from_pretrained(cls,
15671568
resolved_vocab_files[file_id] = hf_hub_download(
15681569
repo_id=pretrained_model_name_or_path,
15691570
filename=file_path,
1570-
cache_dir=MODEL_HOME)
1571+
cache_dir=MODEL_HOME,
1572+
library_name="PaddleNLP",
1573+
library_version=__version__)
15711574
else:
15721575
path = os.path.join(default_root, file_path.split('/')[-1])
15731576
if os.path.exists(path):

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ datasets >= 2.0.0
88
tqdm
99
paddlefsl
1010
sentencepiece
11-
huggingface_hub
11+
huggingface_hub>=0.10.1
1212
paddle2onnx
1313
protobuf>=3.1.0, <=3.20.0
1414
visualdl

0 commit comments

Comments
 (0)