Skip to content

Commit 58b6b55

Browse files
sijunhewj-Mcat
andauthored
Fix taskflow tests and the FileLock import bug (#3898)
* fix taskflow tests * add copyright * pre-commit * use md5 for lock-file name * fix imports * fix gpu test * reemove print statement Co-authored-by: wj-Mcat <[email protected]>
1 parent ed778dc commit 58b6b55

File tree

4 files changed

+42
-20
lines changed

4 files changed

+42
-20
lines changed

paddlenlp/utils/downloader.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,24 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
import hashlib
15+
import json
1416
import os
15-
import sys
1617
import os.path as osp
17-
from typing import Optional
1818
import shutil
19-
import json
20-
import requests
21-
import hashlib
19+
import sys
2220
import tarfile
23-
import zipfile
21+
import threading
2422
import time
2523
import uuid
26-
import threading
24+
import zipfile
2725
from collections import OrderedDict
28-
from .env import DOWNLOAD_SERVER, SUCCESS_STATUS, FAILED_STATUS
26+
from typing import Optional
27+
28+
import requests
29+
30+
from .env import DOWNLOAD_SERVER, FAILED_STATUS, LOCK_FILE_HOME, SUCCESS_STATUS
31+
from .file_lock import FileLock
2932

3033
try:
3134
from tqdm import tqdm
@@ -117,7 +120,7 @@ def get_weights_path_from_url(url, md5sum=None):
117120
Args:
118121
url (str): download url
119122
md5sum (str): md5 sum of download package
120-
123+
121124
Returns:
122125
str: a local path to save downloaded weights.
123126
Examples:
@@ -147,7 +150,7 @@ def get_path_from_url(url, root_dir, md5sum=None, check_exist=True):
147150
root_dir (str): root dir for downloading, it should be
148151
WEIGHTS_HOME or DATASET_HOME
149152
md5sum (str): md5 sum of download package
150-
153+
151154
Returns:
152155
str: a local path to save downloaded models & weights & datasets.
153156
"""
@@ -192,11 +195,13 @@ def get_path_from_url_with_filelock(url: str,
192195
os.makedirs(root_dir, exist_ok=True)
193196

194197
# create lock file, which is empty, under the `LOCK_FILE_HOME` directory.
195-
lock_file_path = os.path.join(LOCK_FILE_HOME,
196-
f"{str(hash(url + root_dir))}")
198+
lock_file_name = hashlib.md5((url + root_dir).encode("utf-8")).hexdigest()
199+
lock_file_path = os.path.join(LOCK_FILE_HOME, lock_file_name)
200+
197201
with FileLock(lock_file_path):
198202
# import get_path_from_url from paddle framework
199-
from paddle.utils.download import get_path_from_url as _get_path_from_url
203+
from paddle.utils.download import \
204+
get_path_from_url as _get_path_from_url
200205
result = _get_path_from_url(url=url,
201206
root_dir=root_dir,
202207
md5sum=md5sum,
@@ -435,6 +440,7 @@ def request_check(self, task, command, addition):
435440
extra.update({"addition": addition})
436441
try:
437442
import paddle
443+
438444
import paddlenlp
439445
payload['hub_version'] = " "
440446
payload['ppnlp_version'] = paddlenlp.__version__

tests/taskflow/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

tests/taskflow/test_text_classification.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def tearDown(self):
5252
self.temp_dir.cleanup()
5353

5454
@parameterized.expand([(1, ), (2, )])
55-
def test_text_classification_taskf(self, batch_size):
55+
def test_text_classification_task(self, batch_size):
5656
# input_text is a tuple to simulate the args passed from Taskflow to TextClassificationTask
5757
input_text = (["百度", "深度学习框架", "飞桨", "PaddleNLP"], )
5858
id2label = {
@@ -64,7 +64,8 @@ def test_text_classification_taskf(self, batch_size):
6464
task="text_classification",
6565
task_path=self.dygraph_model_path,
6666
id2label=id2label,
67-
batch_size=batch_size)
67+
batch_size=batch_size,
68+
device_id=0)
6869

6970
dygraph_results = dygraph_taskflow(input_text)
7071
self.assertEqual(len(dygraph_results), len(input_text[0]))
@@ -75,7 +76,8 @@ def test_text_classification_taskf(self, batch_size):
7576
is_static_model=True,
7677
task_path=self.static_model_path,
7778
id2label=id2label,
78-
batch_size=batch_size)
79+
batch_size=batch_size,
80+
device_id=0)
7981

8082
static_results = static_taskflow(input_text)
8183
self.assertEqual(len(static_results), len(input_text[0]))

tests/transformers/test_modeling_utils.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,15 @@ def test_multiprocess_downloading(self):
5555
num_process_in_pool, num_jobs = 10, 20
5656
small_model_path = "https://paddlenlp.bj.bcebos.com/models/community/__internal_testing__/bert/model_state.pdparams"
5757

58-
from paddlenlp.transformers.model_utils import get_path_from_url
58+
from paddlenlp.transformers.model_utils import get_path_from_url_with_filelock
5959
with TemporaryDirectory() as tempdir:
6060

6161
with Pool(num_process_in_pool) as pool:
62-
pool.starmap(get_path_from_url, [(small_model_path, tempdir)
63-
for _ in range(num_jobs)])
62+
pool.starmap(get_path_from_url_with_filelock,
63+
[(small_model_path, tempdir)
64+
for _ in range(num_jobs)])
6465

65-
# @slow
66+
@slow
6667
def test_model_from_pretrained_with_multiprocessing(self):
6768
"""
6869
this test can not init tooooo many models which will occupy CPU/GPU memorys.

0 commit comments

Comments
 (0)