|
33 | 33 |
|
34 | 34 | # yapf: disable
|
35 | 35 | parser = argparse.ArgumentParser()
|
36 |
| -parser.add_argument("--margin", default=0.2, type=float, help="Margin for pos_score and neg_score") |
37 |
| -parser.add_argument("--eval_step", default=100, type=int, help="Steps interval for evaluation") |
| 36 | +parser.add_argument("--margin", default=0.2, type=float, help="Margin for pos_score and neg_score.") |
38 | 37 | parser.add_argument("--save_dir", default='./checkpoint', type=str, help="The output directory where the model checkpoints will be written.")
|
39 | 38 | parser.add_argument("--max_seq_length", default=128, type=int, help="The maximum total input sequence length after tokenization. "
|
40 | 39 | "Sequences longer than this will be truncated, sequences shorter will be padded.")
|
41 | 40 | parser.add_argument("--batch_size", default=32, type=int, help="Batch size per GPU/CPU for training.")
|
42 | 41 | parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
|
43 | 42 | parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
|
44 | 43 | parser.add_argument("--epochs", default=3, type=int, help="Total number of training epochs to perform.")
|
| 44 | +parser.add_argument("--eval_step", default=100, type=int, help="Step interval for evaluation.") |
| 45 | +parser.add_argument('--save_step', default=10000, type=int, help="Step interval for saving checkpoint.") |
45 | 46 | parser.add_argument("--warmup_proportion", default=0.0, type=float, help="Linear warmup proption over the training process.")
|
46 | 47 | parser.add_argument("--init_from_ckpt", type=str, default=None, help="The path of checkpoint to be loaded.")
|
47 |
| -parser.add_argument("--seed", type=int, default=1000, help="random seed for initialization") |
| 48 | +parser.add_argument("--seed", type=int, default=1000, help="Random seed for initialization.") |
48 | 49 | parser.add_argument('--device', choices=['cpu', 'gpu'], default="gpu", help="Select which device to train model, defaults to gpu.")
|
49 | 50 | args = parser.parse_args()
|
50 | 51 | # yapf: enable
|
@@ -196,12 +197,12 @@ def do_train():
|
196 | 197 | optimizer.clear_grad()
|
197 | 198 |
|
198 | 199 | if global_step % args.eval_step == 0 and rank == 0:
|
| 200 | + evaluate(model, metric, dev_data_loader, "dev") |
| 201 | + |
| 202 | + if global_step % args.save_step == 0 and rank == 0: |
199 | 203 | save_dir = os.path.join(args.save_dir, "model_%d" % global_step)
|
200 | 204 | if not os.path.exists(save_dir):
|
201 | 205 | os.makedirs(save_dir)
|
202 |
| - |
203 |
| - evaluate(model, metric, dev_data_loader, "dev") |
204 |
| - |
205 | 206 | save_param_path = os.path.join(save_dir, 'model_state.pdparams')
|
206 | 207 | paddle.save(model.state_dict(), save_param_path)
|
207 | 208 | tokenizer.save_pretrained(save_dir)
|
|
0 commit comments