Skip to content

Commit 1893811

Browse files
authored
[kill switch] correct sys.exit (#266)
1 parent 497aa1b commit 1893811

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

megatron/training.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ def pretrain(train_valid_test_dataset_provider,
102102
args = get_args()
103103

104104
if found_kill_switch():
105-
sys.exit(f"Detected kill switch at {args.kill_switch_path}. Exiting")
105+
print_datetime(f"Detected kill switch at {args.kill_switch_path}. Exiting")
106+
sys.exit()
106107

107108
codecarbon_tracker_start()
108109

@@ -834,7 +835,8 @@ def train(forward_step_func, model, optimizer, lr_scheduler,
834835

835836
if found_kill_switch():
836837
save_checkpoint_and_time(iteration, model, optimizer, lr_scheduler)
837-
sys.exit(f"Detected kill switch at {args.kill_switch_path}. Exiting")
838+
print_datetime(f"Detected kill switch at {args.kill_switch_path}. Exiting")
839+
sys.exit()
838840

839841
update_num_microbatches(args.consumed_train_samples)
840842
if args.deepspeed:

0 commit comments

Comments
 (0)