Skip to content

Commit 1382201

Browse files
authored
[Roformerv2 P0] Add PretrainedConfig and unit tests (#5131)
* Refactor RoFomerV2 usage * add configuration and modeling * [RoFormerv2 P0] Refactor RoFormerv2 usage * fix issues raised in PR
1 parent f152050 commit 1382201

File tree

7 files changed

+776
-147
lines changed

7 files changed

+776
-147
lines changed

paddlenlp/transformers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@
166166
from .gau_alpha.configuration import *
167167
from .roformerv2.modeling import *
168168
from .roformerv2.tokenizer import *
169+
from .roformerv2.configuration import *
169170
from .optimization import *
170171
from .opt.modeling import *
171172
from .auto.modeling import *
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
""" RoFormerv2 model configuration """
16+
from __future__ import annotations
17+
18+
from paddlenlp.transformers.configuration_utils import PretrainedConfig
19+
20+
__all__ = ["RoFormerv2Config", "ROFORMERV2_PRETRAINED_INIT_CONFIGURATION", "ROFORMERV2_PRETRAINED_RESOURCE_FILES_MAP"]
21+
22+
ROFORMERV2_PRETRAINED_INIT_CONFIGURATION = {
23+
"roformer_v2_chinese_char_small": {
24+
"vocab_size": 12000,
25+
"hidden_size": 384,
26+
"num_hidden_layers": 6,
27+
"num_attention_heads": 6,
28+
"intermediate_size": 1536,
29+
"hidden_act": "relu",
30+
"hidden_dropout_prob": 0.1,
31+
"attention_probs_dropout_prob": 0.1,
32+
"max_position_embeddings": 512,
33+
"type_vocab_size": 2,
34+
"pad_token_id": 0,
35+
"rotary_value": False,
36+
"use_bias": False,
37+
},
38+
"roformer_v2_chinese_char_base": {
39+
"vocab_size": 12000,
40+
"hidden_size": 768,
41+
"num_hidden_layers": 12,
42+
"num_attention_heads": 12,
43+
"intermediate_size": 3072,
44+
"hidden_act": "relu",
45+
"hidden_dropout_prob": 0.1,
46+
"attention_probs_dropout_prob": 0.1,
47+
"max_position_embeddings": 512,
48+
"type_vocab_size": 2,
49+
"pad_token_id": 0,
50+
"rotary_value": False,
51+
"use_bias": False,
52+
},
53+
"roformer_v2_chinese_char_large": {
54+
"vocab_size": 12000,
55+
"hidden_size": 1024,
56+
"num_hidden_layers": 24,
57+
"num_attention_heads": 16,
58+
"intermediate_size": 4096,
59+
"hidden_act": "relu",
60+
"hidden_dropout_prob": 0.1,
61+
"attention_probs_dropout_prob": 0.1,
62+
"max_position_embeddings": 512,
63+
"type_vocab_size": 2,
64+
"pad_token_id": 0,
65+
"rotary_value": False,
66+
"use_bias": False,
67+
},
68+
}
69+
70+
ROFORMERV2_PRETRAINED_RESOURCE_FILES_MAP = {
71+
"model_state": {
72+
"roformer_v2_chinese_char_small": "https://bj.bcebos.com/paddlenlp/models/transformers/roformerv2/roformer_v2_chinese_char_small/model_state.pdparams",
73+
"roformer_v2_chinese_char_base": "https://bj.bcebos.com/paddlenlp/models/transformers/roformerv2/roformer_v2_chinese_char_base/model_state.pdparams",
74+
"roformer_v2_chinese_char_large": "https://bj.bcebos.com/paddlenlp/models/transformers/roformerv2/roformer_v2_chinese_char_large/model_state.pdparams",
75+
}
76+
}
77+
78+
79+
class RoFormerv2Config(PretrainedConfig):
80+
model_type = "roformerv2"
81+
pretrained_init_configuration = ROFORMERV2_PRETRAINED_INIT_CONFIGURATION
82+
83+
def __init__(
84+
self,
85+
vocab_size: int = 12000,
86+
hidden_size: int = 768,
87+
num_hidden_layers: int = 12,
88+
num_attention_heads: int = 12,
89+
intermediate_size: int = 3072,
90+
hidden_act: str = "relu",
91+
hidden_dropout_prob: float = 0.1,
92+
attention_probs_dropout_prob: float = 0.1,
93+
act_dropout: float = 0,
94+
max_position_embeddings: int = 512,
95+
type_vocab_size: int = 2,
96+
pad_token_id: int = 0,
97+
rotary_value: bool = False,
98+
use_bias: bool = False,
99+
epsilon: float = 1e-12,
100+
normalize_before: bool = False,
101+
num_choices: int = 2,
102+
**kwargs
103+
):
104+
super().__init__(**kwargs)
105+
106+
self.vocab_size = vocab_size
107+
self.hidden_size = hidden_size
108+
self.num_hidden_layers = num_hidden_layers
109+
self.num_attention_heads = num_attention_heads
110+
self.intermediate_size = intermediate_size
111+
self.hidden_act = hidden_act
112+
self.hidden_dropout_prob = hidden_dropout_prob
113+
self.attention_probs_dropout_prob = attention_probs_dropout_prob
114+
self.act_dropout = act_dropout
115+
self.max_position_embeddings = max_position_embeddings
116+
self.type_vocab_size = type_vocab_size
117+
self.pad_token_id = pad_token_id
118+
self.rotary_value = rotary_value
119+
self.use_bias = use_bias
120+
self.epsilon = epsilon
121+
self.normalize_before = normalize_before
122+
self.num_choices = num_choices

0 commit comments

Comments
 (0)