27
27
from paddlenlp .data import Stack , Tuple , Pad
28
28
from paddlenlp .datasets import load_dataset
29
29
from paddlenlp .transformers import LinearDecayWithWarmup
30
- from paddlenlp .metrics import MultiLabelsMetric
30
+ from paddlenlp .metrics import MultiLabelsMetric , AccuracyAndF1
31
31
from paddlenlp .ops .optimizer import ExponentialMovingAverage
32
32
33
33
from utils import convert_example
36
36
'KUAKE-QIC' : Accuracy ,
37
37
'KUAKE-QQR' : Accuracy ,
38
38
'KUAKE-QTR' : Accuracy ,
39
- 'CHIP-CTC' : partial (
40
- MultiLabelsMetric , name = 'macro' ),
41
- 'CHIP-STS' : partial (
42
- MultiLabelsMetric , name = 'macro' ),
43
- 'CHIP-CDN-2C' : partial (
44
- MultiLabelsMetric , name = 'micro' )
39
+ 'CHIP-CTC' : MultiLabelsMetric ,
40
+ 'CHIP-STS' : MultiLabelsMetric ,
41
+ 'CHIP-CDN-2C' : AccuracyAndF1
45
42
}
46
43
47
44
# yapf: disable
48
45
parser = argparse .ArgumentParser ()
49
46
parser .add_argument ('--dataset' , choices = ['KUAKE-QIC' , 'KUAKE-QQR' , 'KUAKE-QTR' , 'CHIP-STS' , 'CHIP-CTC' , 'CHIP-CDN-2C' ],
50
- default = 'KUAKE-QIC' , type = str , help = 'Dataset for token classfication tasks.' )
47
+ default = 'KUAKE-QIC' , type = str , help = 'Dataset for sequence classfication tasks.' )
51
48
parser .add_argument ('--seed' , default = 1000 , type = int , help = 'Random seed for initialization.' )
52
49
parser .add_argument ('--device' , choices = ['cpu' , 'gpu' , 'xpu' , 'npu' ], default = 'gpu' , help = 'Select which device to train model, default to gpu.' )
53
- parser .add_argument ('--epochs' , default = 3 , type = int , help = 'Total number of training epochs to perform .' )
50
+ parser .add_argument ('--epochs' , default = 3 , type = int , help = 'Total number of training epochs.' )
54
51
parser .add_argument ('--batch_size' , default = 32 , type = int , help = 'Batch size per GPU/CPU for training.' )
55
52
parser .add_argument ('--learning_rate' , default = 6e-5 , type = float , help = 'Learning rate for fine-tuning sequence classification task.' )
56
- parser .add_argument ('--weight_decay' , default = 0.01 , type = float , help = "Weight decay if we apply some." )
57
- parser .add_argument ('--warmup_proportion' , default = 0.1 , type = float , help = 'Linear warmup proportion over the training process.' )
53
+ parser .add_argument ('--weight_decay' , default = 0.01 , type = float , help = "Weight decay of optimizer if we apply some." )
54
+ parser .add_argument ('--warmup_proportion' , default = 0.1 , type = float , help = 'Linear warmup proportion of learning rate over the training process.' )
58
55
parser .add_argument ('--max_seq_length' , default = 128 , type = int , help = 'The maximum total input sequence length after tokenization.' )
59
56
parser .add_argument ('--init_from_ckpt' , default = None , type = str , help = 'The path of checkpoint to be loaded.' )
60
57
parser .add_argument ('--logging_steps' , default = 10 , type = int , help = 'The interval steps to logging.' )
61
58
parser .add_argument ('--save_dir' , default = './checkpoint' , type = str , help = 'The output directory where the model checkpoints will be written.' )
62
- parser .add_argument ('--save_steps' , default = 100 , type = int , help = 'The interval steps to save checkppoints .' )
59
+ parser .add_argument ('--save_steps' , default = 100 , type = int , help = 'The interval steps to save checkpoints .' )
63
60
parser .add_argument ('--valid_steps' , default = 100 , type = int , help = 'The interval steps to evaluate model performance.' )
64
61
parser .add_argument ('--use_ema' , default = False , type = bool , help = 'Use exponential moving average for evaluation.' )
65
62
parser .add_argument ('--use_amp' , default = False , type = distutils .util .strtobool , help = 'Enable mixed precision training.' )
@@ -100,9 +97,13 @@ def evaluate(model, criterion, metric, data_loader):
100
97
if isinstance (metric , Accuracy ):
101
98
metric_name = 'accuracy'
102
99
result = metric .accumulate ()
100
+ elif isinstance (metric , MultiLabelsMetric ):
101
+ metric_name = 'macro f1'
102
+ _ , _ , result = metric .accumulate ('macro' )
103
103
else :
104
- metric_name = metric ._name + ' f1'
105
- _ , _ , result = metric .accumulate (metric ._name )
104
+ metric_name = 'micro f1'
105
+ _ , _ , _ , result , _ = metric .accumulate ()
106
+
106
107
print ('eval loss: %.5f, %s: %.5f' % (np .mean (losses ), metric_name , result ))
107
108
model .train ()
108
109
metric .reset ()
@@ -143,7 +144,10 @@ def do_train():
143
144
'cblue' , args .dataset , splits = ['train' , 'dev' , 'test' ])
144
145
145
146
model = ppnlp .transformers .ElectraForSequenceClassification .from_pretrained (
146
- 'chinese-ehealth' , num_classes = len (train_ds .label_list ))
147
+ 'chinese-ehealth' ,
148
+ num_classes = len (train_ds .label_list ),
149
+ activation = 'tanh' ,
150
+ layer_norm_eps = 1e-5 )
147
151
tokenizer = ppnlp .transformers .ElectraTokenizer .from_pretrained (
148
152
'chinese-ehealth' )
149
153
@@ -152,9 +156,9 @@ def do_train():
152
156
tokenizer = tokenizer ,
153
157
max_seq_length = args .max_seq_length )
154
158
batchify_fn = lambda samples , fn = Tuple (
155
- Pad (axis = 0 , pad_val = tokenizer .pad_token_id ), # input
156
- Pad (axis = 0 , pad_val = tokenizer .pad_token_type_id ), # segment
157
- Pad (axis = 0 , pad_val = args .max_seq_length - 1 ), # position
159
+ Pad (axis = 0 , pad_val = tokenizer .pad_token_id , dtype = 'int64' ), # input
160
+ Pad (axis = 0 , pad_val = tokenizer .pad_token_type_id , dtype = 'int64' ), # segment
161
+ Pad (axis = 0 , pad_val = args .max_seq_length - 1 , dtype = 'int64' ), # position
158
162
Stack (dtype = 'int64' )): [data for data in fn (samples )]
159
163
train_data_loader = create_dataloader (
160
164
train_ds ,
@@ -172,7 +176,8 @@ def do_train():
172
176
if args .init_from_ckpt and os .path .isfile (args .init_from_ckpt ):
173
177
state_dict = paddle .load (args .init_from_ckpt )
174
178
model .set_dict (state_dict )
175
- model = paddle .DataParallel (model )
179
+ if paddle .distributed .get_world_size () > 1 :
180
+ model = paddle .DataParallel (model )
176
181
177
182
num_training_steps = len (train_data_loader ) * args .epochs
178
183
@@ -196,10 +201,13 @@ def do_train():
196
201
if METRIC_CLASSES [args .dataset ] is Accuracy :
197
202
metric = METRIC_CLASSES [args .dataset ]()
198
203
metric_name = 'accuracy'
199
- else :
204
+ elif METRIC_CLASSES [ args . dataset ] is MultiLabelsMetric :
200
205
metric = METRIC_CLASSES [args .dataset ](
201
206
num_labels = len (train_ds .label_list ))
202
- metric_name = metric ._name + ' f1'
207
+ metric_name = 'macro f1'
208
+ else :
209
+ metric = METRIC_CLASSES [args .dataset ]()
210
+ metric_name = 'micro f1'
203
211
if args .use_amp :
204
212
scaler = paddle .amp .GradScaler (init_loss_scaling = args .scale_loss )
205
213
if args .use_ema and rank == 0 :
@@ -222,8 +230,10 @@ def do_train():
222
230
223
231
if isinstance (metric , Accuracy ):
224
232
result = metric .accumulate ()
233
+ elif isinstance (metric , MultiLabelsMetric ):
234
+ _ , _ , result = metric .accumulate ('macro' )
225
235
else :
226
- _ , _ , result = metric .accumulate (metric . _name )
236
+ _ , _ , _ , result , _ = metric .accumulate ()
227
237
228
238
if args .use_amp :
229
239
scaler .scale (loss ).backward ()
@@ -259,11 +269,14 @@ def do_train():
259
269
save_dir = os .path .join (args .save_dir , 'model_%d' % global_step )
260
270
if not os .path .exists (save_dir ):
261
271
os .makedirs (save_dir )
262
- model ._layers .save_pretrained (save_dir )
272
+ if paddle .distributed .get_world_size () > 1 :
273
+ model ._layers .save_pretrained (save_dir )
274
+ else :
275
+ model .save_pretrained (save_dir )
263
276
tokenizer .save_pretrained (save_dir )
264
277
tic_train = time .time ()
265
-
266
- print ('Speed: %.2f steps/s' % (global_step / total_train_time ))
278
+ if rank == 0 :
279
+ print ('Speed: %.2f steps/s' % (global_step / total_train_time ))
267
280
268
281
269
282
if __name__ == "__main__" :
0 commit comments