fix issue: #2387 to make tokenizer more robust (#2395)

wj-Mcat · ZeyuChen · guoshengCS · web-flow · commit 2a6b6c6c1862 · 2022-06-02T18:04:56.000+08:00
* fix issue: #2387 * format code Co-authored-by: Zeyu Chen <chenzeyu01@baidu.com> Co-authored-by: Guo Sheng <guosheng@baidu.com>
diff --git a/paddlenlp/transformers/tokenizer_utils_base.py b/paddlenlp/transformers/tokenizer_utils_base.py
@@ -1619,8 +1619,8 @@ def convert_added_tokens(obj):
             # does include a vocab file path in it. However, if the vocab file
             # path included in json does not exist, such as was deleted, to make
             # it still work, use the vocab file under this dir.
-            elif not os.path.isfile(init_kwargs[args_name]) and os.path.isfile(
-                    file_path):
+            elif not os.path.isfile(init_kwargs[args_name],
+                                    '') and os.path.isfile(file_path):
                 init_kwargs[args_name] = file_path
         # TODO(guosheng): avoid reduplication of position args and key word args
         tokenizer = cls(*init_args, **init_kwargs)