Skip to content

Commit ed05711

Browse files
authored
fix sentencepiece.bpe.model download (#2454)
1 parent f8c39d8 commit ed05711

File tree

4 files changed

+5
-14
lines changed

4 files changed

+5
-14
lines changed

.github/workflows/unittest-gpu.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ jobs:
138138
rm -rf /root/.cache/aistudio/
139139
cd /workspace/PaddleFormers && git config --global --add safe.directory $PWD
140140
source $work_dir/../../../proxy
141-
timeout 50m bash scripts/unit_test/ci_unit.sh ${paddle_whl}
141+
timeout 100m bash scripts/unit_test/ci_unit.sh ${paddle_whl}
142142
'
143143
fi
144144

paddleformers/transformers/llama/tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
class LlamaTokenizer(PretrainedTokenizer):
3131
model_input_names = ["input_ids", "attention_mask", "position_ids"]
3232
resource_files_names = {
33-
"vocab_file": "sentencepiece.bpe.model",
33+
"vocab_file": "tokenizer.model",
3434
}
3535
pretrained_resource_files_map = {
3636
"vocab_file": {

paddleformers/utils/download/download.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,6 @@ def resolve_file_path(
230230
endpoint=endpoint,
231231
)
232232
if is_available:
233-
import json
234-
235-
with open(
236-
"/root/paddlejob/workspace/env_run/fujinji/erniekit/mine_test/download_source/download_kwargs.json",
237-
"w",
238-
) as f:
239-
json.dump(download_kwargs, f)
240233
cached_file = hf_hub_download(
241234
**download_kwargs,
242235
)

tests/utils/test_aistudio_download.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,19 @@
2424
# repo_id = "PaddleNLP/DeepSeek-R1-Distill-Qwen-1.5B"
2525
# filename = "model.safetensors"
2626
# revision = "master"
27-
# local_dir = "./local/model"
27+
# cache_dir = "./local/model"
2828

2929
# # 调用待测试的函数
3030
# result = resolve_file_path(
3131
# repo_id=repo_id,
3232
# filenames=filename,
3333
# revision=revision,
3434
# download_hub="aistudio",
35-
# local_dir=local_dir,
35+
# cache_dir=cache_dir,
3636
# )
3737

3838
# # 验证结果
39-
# print(result)
40-
# self.assertEqual(result, f"{local_dir}/{filename}")
39+
# self.assertEqual(result, f"{cache_dir}/{repo_id}/{filename}")
4140

4241
# def test_aistudio_download_transformer(self):
4342
# # 设置测试数据
@@ -55,7 +54,6 @@
5554
# )
5655

5756
# # 验证结果
58-
# print(result)
5957
# self.assertEqual(result, f"{cache_dir}/{filename}")
6058

6159

0 commit comments

Comments
 (0)