Skip to content

Commit 56e2654

Browse files
fjjF77lugimzzzLiujie0926PaddleCI
authored
support multi download source (#2427)
Co-authored-by: lugimzzz <[email protected]> Co-authored-by: liujie44 <[email protected]> Co-authored-by: PaddleCI <[email protected]> Co-authored-by: Liujie0926 <[email protected]>
1 parent 49e5f31 commit 56e2654

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+1561
-2645
lines changed

.github/workflows/unittest-cpu.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ env:
1111
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
1212
BRANCH: ${{ github.event.pull_request.base.ref }}
1313
TASK: PaddleFormers-CI-${{ github.event.pull_request.number }}-unittest-cpu
14-
HF_ENDPOINT: https://hf-mirror.com
15-
STUDIO_GIT_HOST: http://git.prod.idc-to-cloud.aistudio.baidu-int.com
1614

1715
jobs:
1816
unittest-cpu-ci:
@@ -41,8 +39,6 @@ jobs:
4139
-e work_dir \
4240
-e no_proxy \
4341
-e python_version \
44-
-e HF_ENDPOINT \
45-
-e STUDIO_GIT_HOST \
4642
-w /workspace ${docker_image}
4743
4844
- name: Download Code
@@ -92,10 +88,12 @@ jobs:
9288
- name: Test
9389
run: |
9490
docker exec -t $container_name /bin/bash -c '
95-
source $work_dir/../../../proxy
91+
source $work_dir/../../../proxy_aistudio
92+
rm -rf /root/.cache/aistudio/
9693
cd /workspace/PaddleFormers
9794
set -e
9895
make test
96+
echo "finished"
9997
'
10098
10199
# - name: Upload Coverage To Codecov

.github/workflows/unittest-gpu.yml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ env:
2828
AGILE_COMPILE_BRANCH: ${{ github.event.pull_request.base.ref }}
2929
CI_name: unittest-gpu-ci
3030
no_proxy: "localhost,bj.bcebos.com,su.bcebos.com,bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
31-
HF_ENDPOINT: https://hf-mirror.com
32-
STUDIO_GIT_HOST: http://git.prod.idc-to-cloud.aistudio.baidu-int.com
33-
PPNLP_HOME: /ssd1/PaddleNLP
34-
HF_DATASETS_CACHE: /ssd1/PaddleNLP/huggingface/datasets
35-
TRANSFORMERS_CACHE: /ssd1/PaddleNLP/huggingface
3631
RUN_DOWNSTREAM: ${{ inputs.run_downstream }}
3732

3833
defaults:
@@ -84,7 +79,6 @@ jobs:
8479
-v $work_dir/../../..:$work_dir/../../.. \
8580
-v $work_dir:/workspace \
8681
-v /home/.cache/pip:/home/.cache/pip \
87-
-v /ssd1/PaddleNLP:/ssd1/PaddleNLP \
8882
-e BRANCH \
8983
-e AGILE_COMPILE_BRANCH \
9084
-e PR_ID \
@@ -96,7 +90,6 @@ jobs:
9690
-e paddle_whl \
9791
-e FLAGS_dynamic_static_unified_comm \
9892
-e python_version \
99-
-e CUDA_VISIBLE_DEVICES=$cudaid \
10093
-w /workspace --runtime=nvidia $IMAGE_NAME
10194
fi
10295
@@ -142,7 +135,9 @@ jobs:
142135
ln -sf $(which python${python_version}) /usr/bin/python3
143136
pip config set global.cache-dir "/home/.cache/pip"
144137
set -e
138+
rm -rf /root/.cache/aistudio/
145139
cd /workspace/PaddleFormers && git config --global --add safe.directory $PWD
140+
source $work_dir/../../../proxy
146141
timeout 50m bash scripts/unit_test/ci_unit.sh ${paddle_whl}
147142
'
148143
fi

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ lint:
3535
test: unit-test
3636

3737
unit-test:
38+
DOWNLOAD_SOURCE=aistudio \
3839
PYTHONPATH=$(shell pwd) pytest -v \
39-
-n auto \
4040
--retries 1 --retry-delay 1 \
4141
--durations 20 \
4242
--cov paddleformers \

paddleformers/generation/configuration_utils.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
from ..transformers.configuration_utils import PretrainedConfig
2828
from ..utils import GENERATION_CONFIG_NAME
29-
from ..utils.download import resolve_file_path
29+
from ..utils.download import DownloadSource, resolve_file_path
3030
from ..utils.downloader import hf_file_exists
3131
from ..utils.log import logger
3232

@@ -337,9 +337,7 @@ def save_pretrained(
337337
def from_pretrained(
338338
cls,
339339
pretrained_model_name_or_path: Union[str, os.PathLike],
340-
from_hf_hub: bool = False,
341-
from_aistudio: bool = False,
342-
from_modelscope: bool = False,
340+
download_hub: DownloadSource = None,
343341
config_file_name: Optional[Union[str, os.PathLike]] = None,
344342
cache_dir: Optional[Union[str, os.PathLike]] = None,
345343
force_download: bool = False,
@@ -358,8 +356,8 @@ def from_pretrained(
358356
- a path to a *directory* containing a configuration file saved using the
359357
[`~PretrainedConfig.save_pretrained`] method, e.g., `./my_model_directory/`.
360358
- a path or url to a saved configuration JSON *file*, e.g., `./my_model_directory/configuration.json`.
361-
from_hf_hub (bool, *optional*):
362-
load config from huggingface hub: https://huggingface.co/models
359+
download_hub (DownloadSource, *optional*):
360+
The source for model downloading, options include `huggingface`, `aistudio`, `modelscope`, default `aistudio`.
363361
cache_dir (`str` or `os.PathLike`, *optional*):
364362
Path to a directory in which a downloaded pretrained model configuration should be cached if the
365363
standard cache should not be used.
@@ -418,9 +416,7 @@ def from_pretrained(
418416
subfolder,
419417
cache_dir=cache_dir,
420418
force_download=force_download,
421-
from_aistudio=from_aistudio,
422-
from_hf_hub=from_hf_hub,
423-
from_modelscope=from_modelscope,
419+
download_hub=download_hub,
424420
)
425421
assert (
426422
resolved_config_file is not None

paddleformers/transformers/auto/configuration.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -294,19 +294,15 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *model_args, **kwar
294294
subfolder = kwargs.get("subfolder", "")
295295
if subfolder is None:
296296
subfolder = ""
297-
from_hf_hub = kwargs.get("from_hf_hub", False)
298-
from_aistudio = kwargs.get("from_aistudio", False)
299-
from_modelscope = kwargs.get("from_modelscope", False)
300297
cache_dir = kwargs.pop("cache_dir", None)
298+
download_hub = kwargs.get("download_hub", None)
301299

302300
config_file = resolve_file_path(
303301
pretrained_model_name_or_path,
304302
[cls.config_file, cls.legacy_config_file],
305303
subfolder,
306304
cache_dir=cache_dir,
307-
from_hf_hub=from_hf_hub,
308-
from_aistudio=from_aistudio,
309-
from_modelscope=from_modelscope,
305+
download_hub=download_hub,
310306
)
311307
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
312308
if "model_type" in config_dict:

paddleformers/transformers/auto/image_processing.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
128128
subfolder = kwargs.get("subfolder", "")
129129
if subfolder is None:
130130
subfolder = ""
131-
from_hf_hub = kwargs.get("from_hf_hub", False)
132-
from_aistudio = kwargs.get("from_aistudio", False)
133-
from_modelscope = kwargs.get("from_modelscope", False)
131+
download_hub = kwargs.get("download_hub", None)
134132
kwargs["subfolder"] = subfolder
135133
kwargs["cache_dir"] = cache_dir
136134

@@ -157,9 +155,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
157155
[cls.image_processor_config_file],
158156
subfolder,
159157
cache_dir=cache_dir,
160-
from_hf_hub=from_hf_hub,
161-
from_aistudio=from_aistudio,
162-
from_modelscope=from_modelscope,
158+
download_hub=download_hub,
163159
)
164160
if config_file is not None and os.path.exists(config_file):
165161
processor_class = cls._get_image_processor_class_from_config(

paddleformers/transformers/auto/modeling.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,7 @@ def _from_pretrained(cls, pretrained_model_name_or_path, task=None, *model_args,
223223
else:
224224
print("We only support task choice for AutoModel.")
225225
cache_dir = kwargs.get("cache_dir", None)
226-
from_hf_hub = kwargs.get("from_hf_hub", False)
227-
from_aistudio = kwargs.get("from_aistudio", False)
228-
from_modelscope = kwargs.get("from_modelscope", False)
226+
download_hub = kwargs.get("download_hub", None)
229227
subfolder = kwargs.get("subfolder", "")
230228
if subfolder is None:
231229
subfolder = ""
@@ -273,9 +271,7 @@ def _from_pretrained(cls, pretrained_model_name_or_path, task=None, *model_args,
273271
[cls.model_config_file, cls.legacy_model_config_file],
274272
subfolder,
275273
cache_dir=cache_dir,
276-
from_hf_hub=from_hf_hub,
277-
from_aistudio=from_aistudio,
278-
from_modelscope=from_modelscope,
274+
download_hub=download_hub,
279275
)
280276
if config_file is not None and os.path.exists(config_file):
281277
model_class = cls._get_model_class_from_config(pretrained_model_name_or_path, config_file)

paddleformers/transformers/auto/processing.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
136136
subfolder = kwargs.get("subfolder", "")
137137
if subfolder is None:
138138
subfolder = ""
139-
from_hf_hub = kwargs.get("from_hf_hub", False)
140-
from_aistudio = kwargs.get("from_aistudio", False)
141-
from_modelscope = kwargs.get("from_modelscope", False)
139+
download_hub = kwargs.get("download_hub", None)
142140
kwargs["subfolder"] = subfolder
143141
kwargs["cache_dir"] = cache_dir
144142

@@ -165,9 +163,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
165163
[cls.processor_config_file],
166164
subfolder,
167165
cache_dir=cache_dir,
168-
from_hf_hub=from_hf_hub,
169-
from_aistudio=from_aistudio,
170-
from_modelscope=from_modelscope,
166+
download_hub=download_hub,
171167
)
172168
if config_file is not None and os.path.exists(config_file):
173169
processor_class = cls._get_processor_class_from_config(

paddleformers/transformers/auto/tokenizer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ def get_tokenizer_config(
226226
tokenizer.save_pretrained("tokenizer-test")
227227
tokenizer_config = get_tokenizer_config("tokenizer-test")
228228
```"""
229+
download_hub = kwargs.get("download_hub", None)
229230

230231
resolved_config_file = resolve_file_path(
231232
pretrained_model_name_or_path,
@@ -238,6 +239,7 @@ def get_tokenizer_config(
238239
revision=revision,
239240
local_files_only=local_files_only,
240241
subfolder=subfolder,
242+
download_hub=download_hub,
241243
)
242244
if resolved_config_file is None:
243245
logger.info("Could not locate the tokenizer configuration file, will try to use the model config instead.")

paddleformers/transformers/configuration_utils.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -837,9 +837,7 @@ def _get_config_dict(
837837
cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
838838
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
839839
cache_dir = kwargs.pop("cache_dir", None)
840-
from_hf_hub = kwargs.pop("from_hf_hub", False)
841-
from_aistudio = kwargs.pop("from_aistudio", False)
842-
from_modelscope = kwargs.pop("from_modelscope", False)
840+
download_hub = kwargs.pop("download_hub", None)
843841
subfolder = kwargs.pop("subfolder", "")
844842
if subfolder is None:
845843
subfolder = ""
@@ -879,9 +877,7 @@ def _get_config_dict(
879877
subfolder,
880878
cache_dir=cache_dir,
881879
force_download=force_download,
882-
from_aistudio=from_aistudio,
883-
from_hf_hub=from_hf_hub,
884-
from_modelscope=from_modelscope,
880+
download_hub=download_hub,
885881
)
886882
if resolved_config_file is None:
887883
return None, kwargs

0 commit comments

Comments
 (0)