|
29 | 29 | parser.add_argument('--warmup_proportion', default=0.1, type=float, help='Linear warmup proportion over the training process.')
|
30 | 30 | parser.add_argument('--use_amp', default=False, type=bool, help='Enable mixed precision training.')
|
31 | 31 | parser.add_argument('--epochs', default=1, type=int, help='Total number of training epochs.')
|
32 |
| -parser.add_argument('--eval_mention', default=True, type=bool, help='.') |
33 |
| -parser.add_argument('--update_tokenizer', default=True, type=bool, help='Update the word tokenizer during training.') |
34 | 32 | parser.add_argument('--seed', default=1000, type=int, help='Random seed.')
|
35 | 33 | parser.add_argument('--save_dir', default='./checkpoint', type=str, help='The output directory where the model checkpoints will be written.')
|
36 | 34 |
|
@@ -105,7 +103,7 @@ def do_train():
|
105 | 103 | 'input_ids': Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype='int64'),
|
106 | 104 | 'token_type_ids': Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype='int64'),
|
107 | 105 | 'position_ids': Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype='int64'),
|
108 |
| - 'mask': Pad(axis=0, pad_val=0, dtype='float32'), |
| 106 | + 'attention_mask': Pad(axis=0, pad_val=0, dtype='float32'), |
109 | 107 | 'label_oth': Pad(axis=0, pad_val=pad_label_id[0], dtype='int64'),
|
110 | 108 | 'label_sym': Pad(axis=0, pad_val=pad_label_id[1], dtype='int64')
|
111 | 109 | }): fn(samples)
|
@@ -164,8 +162,6 @@ def do_train():
|
164 | 162 | with paddle.amp.auto_cast(
|
165 | 163 | args.use_amp,
|
166 | 164 | custom_white_list=['layer_norm', 'softmax', 'gelu'], ):
|
167 |
| - att_mask = paddle.unsqueeze(masks, axis=2) |
168 |
| - att_mask = paddle.matmul(att_mask, att_mask, transpose_y=True) |
169 | 165 | logits = model(input_ids, token_type_ids, position_ids, masks)
|
170 | 166 |
|
171 | 167 | loss_oth = criterion(logits[0], paddle.unsqueeze(label_oth, 2))
|
|
0 commit comments