Skip to content

Commit 91ab453

Browse files
authored
Fix load config for UIE (#4271)
* fix load config * update comment
1 parent 867afd6 commit 91ab453

File tree

1 file changed

+46
-38
lines changed

1 file changed

+46
-38
lines changed

paddlenlp/taskflow/information_extraction.py

Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from ..layers import GlobalPointerForEntityExtraction, GPLinkerForRelationExtraction
2727
from ..transformers import UIE, UIEM, UIEX, AutoModel, AutoTokenizer
2828
from ..utils.doc_parser import DocParser
29-
from ..utils.env import CONFIG_NAME
29+
from ..utils.env import CONFIG_NAME, LEGACY_CONFIG_NAME
3030
from ..utils.ie_utils import map_offset, pad_image_data
3131
from ..utils.log import logger
3232
from ..utils.tools import get_bool_ids_greater_than, get_span
@@ -114,7 +114,7 @@ class UIETask(Task):
114114

115115
resource_files_names = {
116116
"model_state": "model_state.pdparams",
117-
"model_config": "model_config.json",
117+
"config": "config.json",
118118
"vocab_file": "vocab.txt",
119119
"special_tokens_map": "special_tokens_map.json",
120120
"tokenizer_config": "tokenizer_config.json",
@@ -126,9 +126,9 @@ class UIETask(Task):
126126
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base_v1.1/model_state.pdparams",
127127
"47b93cf6a85688791699548210048085",
128128
],
129-
"model_config": [
130-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/model_config.json",
131-
"a36c185bfc17a83b6cfef6f98b29c909",
129+
"config": [
130+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/config.json",
131+
"ad8b5442c758fb2dc18ea53b61e867f7",
132132
],
133133
"vocab_file": [
134134
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/vocab.txt",
@@ -148,9 +148,9 @@ class UIETask(Task):
148148
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_medium_v1.1/model_state.pdparams",
149149
"c34475665eb05e25f3c9cd9b020b331a",
150150
],
151-
"model_config": [
152-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_medium/model_config.json",
153-
"6f1ee399398d4f218450fbbf5f212b15",
151+
"config": [
152+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_medium/config.json",
153+
"7fb22b3e07c5af76371c25ab814f06b8",
154154
],
155155
"vocab_file": [
156156
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/vocab.txt",
@@ -170,9 +170,9 @@ class UIETask(Task):
170170
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_mini_v1.1/model_state.pdparams",
171171
"9a0805762c41b104d590c15fbe9b19fd",
172172
],
173-
"model_config": [
174-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_mini/model_config.json",
175-
"9229ce0a9d599de4602c97324747682f",
173+
"config": [
174+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_mini/config.json",
175+
"8ddebbf64c3f32a49e6f9e1c220e7322",
176176
],
177177
"vocab_file": [
178178
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/vocab.txt",
@@ -192,9 +192,9 @@ class UIETask(Task):
192192
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_micro_v1.1/model_state.pdparams",
193193
"da67287bca2906864929e16493f748e4",
194194
],
195-
"model_config": [
196-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_micro/model_config.json",
197-
"07ef444420c3ab474f9270a1027f6da5",
195+
"config": [
196+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_micro/config.json",
197+
"544ddc65c758536cd3ba122f55b8709c",
198198
],
199199
"vocab_file": [
200200
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/vocab.txt",
@@ -214,9 +214,9 @@ class UIETask(Task):
214214
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_nano_v1.1/model_state.pdparams",
215215
"48db5206232e89ef16b66467562d90e5",
216216
],
217-
"model_config": [
218-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_nano/model_config.json",
219-
"e3a9842edf8329ccdd0cf6039cf0a8f8",
217+
"config": [
218+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_nano/config.json",
219+
"e0e0a2c0d9651ed1a8492be5507590a9",
220220
],
221221
"vocab_file": [
222222
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/vocab.txt",
@@ -237,9 +237,9 @@ class UIETask(Task):
237237
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_medium_v1.1/model_state.pdparams",
238238
"c34475665eb05e25f3c9cd9b020b331a",
239239
],
240-
"model_config": [
241-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_medium/model_config.json",
242-
"6f1ee399398d4f218450fbbf5f212b15",
240+
"config": [
241+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_medium/config.json",
242+
"7fb22b3e07c5af76371c25ab814f06b8",
243243
],
244244
"vocab_file": [
245245
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/vocab.txt",
@@ -259,9 +259,9 @@ class UIETask(Task):
259259
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_medical_base_v0.2/model_state.pdparams",
260260
"7582d3b01f6faf00b7000111ea853796",
261261
],
262-
"model_config": [
263-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/model_config.json",
264-
"a36c185bfc17a83b6cfef6f98b29c909",
262+
"config": [
263+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/config.json",
264+
"ad8b5442c758fb2dc18ea53b61e867f7",
265265
],
266266
"vocab_file": [
267267
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base/vocab.txt",
@@ -281,9 +281,9 @@ class UIETask(Task):
281281
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base_en_v1.2/model_state.pdparams",
282282
"8c5d5c8faa76681a0aad58f982cd6141",
283283
],
284-
"model_config": [
285-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base_en/model_config.json",
286-
"2ca9fe0eea8ff9418725d1a24fcf5c36",
284+
"config": [
285+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base_en/config.json",
286+
"257b80ea8b7889fd8b83a9ace7a8a220",
287287
],
288288
"vocab_file": [
289289
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_base_en/vocab.txt",
@@ -303,9 +303,9 @@ class UIETask(Task):
303303
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_m_base_v1.1/model_state.pdparams",
304304
"eb00c06bd7144e76343d750f5bf36ff6",
305305
],
306-
"model_config": [
307-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_m_base/model_config.json",
308-
"05c4b9d050e1402a891b207e36d2e501",
306+
"config": [
307+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_m_base/config.json",
308+
"f03de3ce1b83c13e7bee18e6f323d33f",
309309
],
310310
"vocab_file": [
311311
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_m_base/vocab.txt",
@@ -329,9 +329,9 @@ class UIETask(Task):
329329
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_m_large_v1.1/model_state.pdparams",
330330
"9db83a67f34a9c2483dbe57d2510b4c2",
331331
],
332-
"model_config": [
333-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_m_large/model_config.json",
334-
"22ad69618dc3f4c3fe756e3044c3056e",
332+
"config": [
333+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_m_large/config.json",
334+
"8f540de05de57ecc66336b41f3a7ffdb",
335335
],
336336
"vocab_file": [
337337
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_m_large/vocab.txt",
@@ -355,9 +355,9 @@ class UIETask(Task):
355355
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_x_base_v1.0/model_state.pdparams",
356356
"a953b55f7639ae73d1df6c2c5f7667dd",
357357
],
358-
"model_config": [
359-
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_x_base/model_config.json",
360-
"50be05e78aec34d37596513870fa050e",
358+
"config": [
359+
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_x_base/config.json",
360+
"6bcd7d4b119717121fa0276c20bd9224",
361361
],
362362
"vocab_file": [
363363
"https://bj.bcebos.com/paddlenlp/taskflow/information_extraction/uie_x_base/vocab.txt",
@@ -403,9 +403,17 @@ def __init__(self, task, model, schema=None, **kwargs):
403403
with open(config_file_path) as f:
404404
self._init_class = json.load(f)["architectures"].pop()
405405
else:
406-
self._check_task_files()
407-
with open(os.path.join(self._task_path, "model_config.json")) as f:
408-
self._init_class = json.load(f)["init_class"]
406+
# Compatible with the model fine-tuned without PretrainedConfig
407+
if os.path.exists(os.path.join(self._task_path, LEGACY_CONFIG_NAME)):
408+
if "config" in self.resource_files_names.keys():
409+
del self.resource_files_names["config"]
410+
with open(os.path.join(self._task_path, LEGACY_CONFIG_NAME)) as f:
411+
self._init_class = json.load(f)["init_class"]
412+
self._check_task_files()
413+
else:
414+
self._check_task_files()
415+
with open(os.path.join(self._task_path, CONFIG_NAME)) as f:
416+
self._init_class = json.load(f)["architectures"].pop()
409417

410418
if self._init_class not in ["UIEX", "UIEM"]:
411419
if "sentencepiece_model_file" in self.resource_files_names.keys():

0 commit comments

Comments
 (0)