File tree Expand file tree Collapse file tree 2 files changed +10
-3
lines changed
Expand file tree Collapse file tree 2 files changed +10
-3
lines changed Original file line number Diff line number Diff line change 4646 "num_train_epochs" : 5 ,
4747 "per_device_train_batch_size" : 4 ,
4848 "per_device_eval_batch_size" : 4 ,
49- "gradient_accumulation_steps" : 4 ,
49+ "gradient_accumulation_steps" : 1 ,
5050 "learning_rate" : 0.00001 ,
5151 "weight_decay" : 0 ,
5252 "warmup_ratio" : 0.03 ,
Original file line number Diff line number Diff line change 2222import copy
2323import json
2424import os
25+ import re
2526import tempfile
2627
2728# Third Party
8788 num_train_epochs = 5 ,
8889 per_device_train_batch_size = 4 ,
8990 per_device_eval_batch_size = 4 ,
90- gradient_accumulation_steps = 4 ,
91+ gradient_accumulation_steps = 1 ,
9192 learning_rate = 0.00001 ,
9293 weight_decay = 0 ,
9394 warmup_ratio = 0.03 ,
@@ -1142,7 +1143,13 @@ def _validate_hf_resource_scanner_file(tempdir):
11421143
11431144
11441145def _get_checkpoint_path (dir_path ):
1145- return os .path .join (dir_path , "checkpoint-5" )
1146+ checkpoint_dirs = [
1147+ d
1148+ for d in os .listdir (dir_path )
1149+ if os .path .isdir (os .path .join (dir_path , d )) and re .match (r"^checkpoint-\d+$" , d )
1150+ ]
1151+ checkpoint_dirs .sort (key = lambda name : int (name .split ("-" )[- 1 ]))
1152+ return os .path .join (dir_path , checkpoint_dirs [- 1 ])
11461153
11471154
11481155def _get_adapter_config (dir_path ):
You can’t perform that action at this time.
0 commit comments