31
31
from paddlenlp .datasets import load_dataset
32
32
from paddlenlp .transformers import LinearDecayWithWarmup
33
33
from paddlenlp .utils .log import logger
34
- from paddlenlp .transformers import ErnieForSequenceClassification , ErnieTokenizer , ErnieModel
34
+ from paddlenlp .transformers import PPMiniLMModel
35
35
36
36
from paddleslim .nas .ofa import OFA , DistillConfig , utils
37
37
from paddleslim .nas .ofa .utils import nlp_utils
@@ -194,11 +194,11 @@ def evaluate(model, metric, data_loader, width_mult, student=False):
194
194
195
195
196
196
### monkey patch for bert forward to accept [attention_mask, head_mask] as attention_mask
197
- def ernie_forward (self ,
198
- input_ids ,
199
- token_type_ids = None ,
200
- position_ids = None ,
201
- attention_mask = [None , None ]):
197
+ def ppminilm_forward (self ,
198
+ input_ids ,
199
+ token_type_ids = None ,
200
+ position_ids = None ,
201
+ attention_mask = [None , None ]):
202
202
wtype = self .pooler .dense .fn .weight .dtype if hasattr (
203
203
self .pooler .dense , 'fn' ) else self .pooler .dense .weight .dtype
204
204
if attention_mask [0 ] is None :
@@ -211,7 +211,7 @@ def ernie_forward(self,
211
211
return encoded_layer , pooled_output
212
212
213
213
214
- ErnieModel .forward = ernie_forward
214
+ PPMiniLMModel .forward = ppminilm_forward
215
215
216
216
217
217
### reorder weights according head importance and neuron importance
@@ -220,14 +220,15 @@ def reorder_neuron_head(model, head_importance, neuron_importance):
220
220
for layer , current_importance in enumerate (neuron_importance ):
221
221
# reorder heads
222
222
idx = paddle .argsort (head_importance [layer ], descending = True )
223
- nlp_utils .reorder_head (model .ernie .encoder .layers [layer ].self_attn , idx )
223
+ nlp_utils .reorder_head (model .ppminilm .encoder .layers [layer ].self_attn ,
224
+ idx )
224
225
# reorder neurons
225
226
idx = paddle .argsort (
226
227
paddle .to_tensor (current_importance ), descending = True )
227
228
nlp_utils .reorder_neuron (
228
- model .ernie .encoder .layers [layer ].linear1 .fn , idx , dim = 1 )
229
+ model .ppminilm .encoder .layers [layer ].linear1 .fn , idx , dim = 1 )
229
230
nlp_utils .reorder_neuron (
230
- model .ernie .encoder .layers [layer ].linear2 .fn , idx , dim = 0 )
231
+ model .ppminilm .encoder .layers [layer ].linear2 .fn , idx , dim = 0 )
231
232
232
233
233
234
def soft_cross_entropy (inp , target ):
@@ -305,9 +306,9 @@ def do_train(args):
305
306
args .model_name_or_path , num_classes = num_labels )
306
307
307
308
# Step4: Config about distillation.
308
- mapping_layers = ['ernie .embeddings' ]
309
- for idx in range (model .ernie .config ['num_hidden_layers' ]):
310
- mapping_layers .append ('ernie .encoder.layers.{}' .format (idx ))
309
+ mapping_layers = ['ppminilm .embeddings' ]
310
+ for idx in range (model .ppminilm .config ['num_hidden_layers' ]):
311
+ mapping_layers .append ('ppminilm .encoder.layers.{}' .format (idx ))
311
312
312
313
default_distill_config = {
313
314
'lambda_distill' : 0.1 ,
@@ -333,8 +334,8 @@ def do_train(args):
333
334
ofa_model .model ,
334
335
dev_data_loader ,
335
336
loss_fct = criterion ,
336
- num_layers = model .ernie .config ['num_hidden_layers' ],
337
- num_heads = model .ernie .config ['num_attention_heads' ])
337
+ num_layers = model .ppminilm .config ['num_hidden_layers' ],
338
+ num_heads = model .ppminilm .config ['num_attention_heads' ])
338
339
reorder_neuron_head (ofa_model .model , head_importance , neuron_importance )
339
340
340
341
if paddle .distributed .get_world_size () > 1 :
0 commit comments