Skip to content

Commit d6d77ec

Browse files
authored
Upload dualencoder params to bos (#1795)
* upload dual encoder params to bos * update example code * fix model name * update rocketqa model name * remove useless blank line
1 parent 87f342c commit d6d77ec

File tree

3 files changed

+28
-27
lines changed

3 files changed

+28
-27
lines changed

paddlenlp/transformers/ernie/modeling.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ class ErniePretrainedModel(PretrainedModel):
186186
"vocab_size": 30522,
187187
"pad_token_id": 0,
188188
},
189-
"ernie-base-cn-query-encoder": {
189+
"rocketqa-zh-dureader-query-encoder": {
190190
"attention_probs_dropout_prob": 0.1,
191191
"hidden_act": "relu",
192192
"hidden_dropout_prob": 0.1,
@@ -199,7 +199,7 @@ class ErniePretrainedModel(PretrainedModel):
199199
"vocab_size": 18000,
200200
"pad_token_id": 0,
201201
},
202-
"ernie-base-cn-title-encoder": {
202+
"rocketqa-zh-dureader-para-encoder": {
203203
"attention_probs_dropout_prob": 0.1,
204204
"hidden_act": "relu",
205205
"hidden_dropout_prob": 0.1,
@@ -212,7 +212,7 @@ class ErniePretrainedModel(PretrainedModel):
212212
"vocab_size": 18000,
213213
"pad_token_id": 0,
214214
},
215-
"ernie-base-en-query-encoder": {
215+
"rocketqa-v1-marco-query-encoder": {
216216
"attention_probs_dropout_prob": 0.1,
217217
"hidden_act": "gelu",
218218
"hidden_dropout_prob": 0.1,
@@ -225,7 +225,7 @@ class ErniePretrainedModel(PretrainedModel):
225225
"vocab_size": 30522,
226226
"pad_token_id": 0,
227227
},
228-
"ernie-base-en-title-encoder": {
228+
"rocketqa-v1-marco-para-encoder": {
229229
"attention_probs_dropout_prob": 0.1,
230230
"hidden_act": "gelu",
231231
"hidden_dropout_prob": 0.1,
@@ -252,14 +252,14 @@ class ErniePretrainedModel(PretrainedModel):
252252
"https://bj.bcebos.com/paddlenlp/models/transformers/ernie_v2_base/ernie_v2_eng_base_finetuned_squad.pdparams",
253253
"ernie-2.0-large-en":
254254
"https://bj.bcebos.com/paddlenlp/models/transformers/ernie_v2_large/ernie_v2_eng_large.pdparams",
255-
"ernie-base-cn-query-encoder":
256-
"https://bj.bcebos.com/paddlenlp/models/transformers/semantic_indexing/ernie_base_cn_query_encoder.pdparams",
257-
"ernie-base-cn-title-encoder":
258-
"https://bj.bcebos.com/paddlenlp/models/transformers/semantic_indexing/ernie_base_cn_title_encoder.pdparams",
259-
"ernie-base-en-query-encoder":
260-
"https://bj.bcebos.com/paddlenlp/models/transformers/semantic_indexing/ernie_base_en_query_encoder.pdparams",
261-
"ernie-base-en-title-encoder":
262-
"https://bj.bcebos.com/paddlenlp/models/transformers/semantic_indexing/ernie_base_en_title_encoder.pdparams",
255+
"rocketqa-zh-dureader-query-encoder":
256+
"https://bj.bcebos.com/paddlenlp/models/transformers/rocketqa/rocketqa_zh_dureader_query_encoder.pdparams",
257+
"rocketqa-zh-dureader-para-encoder":
258+
"https://bj.bcebos.com/paddlenlp/models/transformers/rocketqa/rocketqa_zh_dureader_para_encoder.pdparams",
259+
"rocketqa-v1-marco-query-encoder":
260+
"https://bj.bcebos.com/paddlenlp/models/transformers/rocketqa/rocketqa_v1_marco_query_encoder.pdparams",
261+
"rocketqa-v1-marco-para-encoder":
262+
"https://bj.bcebos.com/paddlenlp/models/transformers/rocketqa/rocketqa_v1_marco_para_encoder.pdparams",
263263
}
264264
}
265265
base_model_prefix = "ernie"

paddlenlp/transformers/ernie/tokenizer.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,14 @@ class ErnieTokenizer(PretrainedTokenizer):
9393
"https://bj.bcebos.com/paddlenlp/models/transformers/ernie-gen-large/vocab.txt",
9494
"ernie-gen-large-430g-en":
9595
"https://bj.bcebos.com/paddlenlp/models/transformers/ernie-gen-large-430g/vocab.txt",
96-
"ernie-base-cn-query-encoder":
97-
"https://bj.bcebos.com/paddlenlp/models/transformers/ernie/vocab.txt",
98-
"ernie-base-cn-title-encoder":
99-
"https://bj.bcebos.com/paddlenlp/models/transformers/ernie/vocab.txt",
100-
"ernie-base-en-query-encoder":
101-
"https://bj.bcebos.com/paddlenlp/models/transformers/ernie_v2_base/vocab.txt",
102-
"ernie-base-en-title-encoder":
103-
"https://bj.bcebos.com/paddlenlp/models/transformers/ernie_v2_base/vocab.txt",
96+
"rocketqa-zh-dureader-query-encoder":
97+
"https://bj.bcebos.com/paddlenlp/models/transformers/rocketqa/rocketqa-zh-dureader-vocab.txt",
98+
"rocketqa-zh-dureader-para-encoder":
99+
"https://bj.bcebos.com/paddlenlp/models/transformers/rocketqa/rocketqa-zh-dureader-vocab.txt",
100+
"rocketqa-v1-marco-query-encoder":
101+
"https://bj.bcebos.com/paddlenlp/models/transformers/rocketqa/rocketqa-v1-marco-vocab.txt",
102+
"rocketqa-v1-marco-para-encoder":
103+
"https://bj.bcebos.com/paddlenlp/models/transformers/rocketqa/rocketqa-v1-marco-vocab.txt",
104104
}
105105
}
106106
pretrained_init_configuration = {
@@ -131,16 +131,16 @@ class ErnieTokenizer(PretrainedTokenizer):
131131
"ppminilm-6l-768h": {
132132
"do_lower_case": True
133133
},
134-
"ernie-base-cn-query-encoder": {
134+
"rocketqa-zh-dureader-query-encoder": {
135135
"do_lower_case": True
136136
},
137-
"ernie-base-cn-title-encoder": {
137+
"rocketqa-zh-dureader-para-encoder": {
138138
"do_lower_case": True
139139
},
140-
"ernie-base-en-query-encoder": {
140+
"rocketqa-v1-marco-query-encoder": {
141141
"do_lower_case": True
142142
},
143-
"ernie-base-en-title-encoder": {
143+
"rocketqa-v1-marco-para-encoder": {
144144
"do_lower_case": True
145145
},
146146
}

paddlenlp/transformers/semantic_indexing/modeling.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,11 @@ class allows two ErnieEncoder models to be trained at the same time.
5858
5959
.. code-block::
6060
61-
from paddlenlp.transformers import ErnieDualEncoder
61+
import paddle
62+
from paddlenlp.transformers import ErnieDualEncoder, ErnieTokenizer
6263
63-
model = ErnieDualEncoder("ernie-base-cn-query-encoder", "ernie-base-cn-title-encoder")
64+
model = ErnieDualEncoder("rocketqa-zh-dureader-query-encoder", "rocketqa-zh-dureader-para-encoder")
65+
tokenizer = ErnieTokenizer.from_pretrained("rocketqa-zh-dureader-query-encoder")
6466
6567
inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
6668
inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
@@ -130,7 +132,6 @@ def cosine_sim(self,
130132
title_token_type_ids=None,
131133
title_position_ids=None,
132134
title_attention_mask=None):
133-
134135
query_cls_embedding = self.get_pooled_embedding(
135136
query_input_ids, query_token_type_ids, query_position_ids,
136137
query_attention_mask)

0 commit comments

Comments
 (0)