File tree Expand file tree Collapse file tree 7 files changed +10
-10
lines changed
applications/ColossalChat/coati/trainer Expand file tree Collapse file tree 7 files changed +10
-10
lines changed Original file line number Diff line number Diff line change @@ -380,9 +380,9 @@ def _criterion(outputs, inputs):
380
380
self .accumulative_meter .get ("accuracy" ),
381
381
global_step ,
382
382
)
383
- self .num_train_step += 1
384
383
self .accumulative_meter .reset ()
385
-
384
+ self .num_train_step += 1
385
+
386
386
if self .save_dir is not None and self .num_train_step > 0 and self .num_train_step % self .save_interval == 0 :
387
387
# save checkpoint
388
388
self .coordinator .print_on_master ("\n Start saving model checkpoint with running states" )
Original file line number Diff line number Diff line change @@ -231,7 +231,6 @@ def _training_step(self, experience: Experience):
231
231
experience:
232
232
sequences: [batch_size, prompt_length + response_length] --- <PAD>...<PAD><PROMPT>...<PROMPT><RESPONSE>...<RESPONSE><PAD>...<PAD>
233
233
"""
234
- self .num_train_step += 1
235
234
self .actor .train ()
236
235
num_actions = experience .action_log_probs .size (1 )
237
236
# policy loss
@@ -294,7 +293,7 @@ def _training_step(self, experience: Experience):
294
293
self .temperature_annealing_scheduler .step_forward ()
295
294
296
295
# preparing logging model output and corresponding rewards.
297
- if self .num_train_step % 10 == 1 :
296
+ if self .num_train_step % 10 == 0 :
298
297
response_text = self .experience_maker .tokenizer .batch_decode (
299
298
experience .sequences , skip_special_tokens = True
300
299
)
@@ -327,6 +326,7 @@ def _training_step(self, experience: Experience):
327
326
self .writer .add_scalar ("approx_kl" , self .accumulative_meter .get ("kl" ), global_step )
328
327
self .writer .add_scalar ("advantages" , self .accumulative_meter .get ("advantages" ), global_step )
329
328
self .accumulative_meter .reset ()
329
+ self .num_train_step += 1
330
330
331
331
def _learn (self , update_step : int ):
332
332
"""
Original file line number Diff line number Diff line change @@ -256,7 +256,7 @@ def _train(self, epoch: int):
256
256
self .coordinator .print_on_master (
257
257
f"Saved checkpoint at epoch { epoch } step { self .save_interval } at folder { self .save_dir } "
258
258
)
259
- self .num_train_step += 1
259
+ self .num_train_step += 1
260
260
261
261
step_bar .close ()
262
262
Original file line number Diff line number Diff line change @@ -233,7 +233,7 @@ def _train(self, epoch: int):
233
233
self .coordinator .print_on_master (
234
234
f"Saved checkpoint at epoch { epoch } step { self .save_interval } at folder { self .save_dir } "
235
235
)
236
- self .num_train_step += 1
236
+ self .num_train_step += 1
237
237
238
238
step_bar .close ()
239
239
Original file line number Diff line number Diff line change @@ -220,7 +220,6 @@ def _training_step(self, experience: Experience):
220
220
experience:
221
221
sequences: [batch_size, prompt_length + response_length] --- <PAD>...<PAD><PROMPT>...<PROMPT><RESPONSE>...<RESPONSE><PAD>...<PAD>
222
222
"""
223
- self .num_train_step += 1
224
223
self .actor .train ()
225
224
self .critic .train ()
226
225
num_actions = experience .action_log_probs .size (1 )
@@ -294,7 +293,7 @@ def _training_step(self, experience: Experience):
294
293
self .critic_scheduler .step ()
295
294
296
295
# preparing logging model output and corresponding rewards.
297
- if self .num_train_step % 10 == 1 :
296
+ if self .num_train_step % 10 == 0 :
298
297
response_text = self .experience_maker .tokenizer .batch_decode (
299
298
experience .sequences , skip_special_tokens = True
300
299
)
@@ -336,6 +335,7 @@ def _training_step(self, experience: Experience):
336
335
self .writer .add_scalar ("value" , self .accumulative_meter .get ("value" ), self .num_train_step )
337
336
self .writer .add_scalar ("advantages" , self .accumulative_meter .get ("advantages" ), self .num_train_step )
338
337
self .accumulative_meter .reset ()
338
+ self .num_train_step += 1
339
339
340
340
def _learn (self , update_step : int ):
341
341
"""
Original file line number Diff line number Diff line change @@ -193,7 +193,7 @@ def _train(self, epoch):
193
193
self .coordinator .print_on_master (
194
194
f"Saved checkpoint at epoch { epoch } step { (i + 1 )/ self .accumulation_steps } at folder { self .save_dir } "
195
195
)
196
- self .num_train_step += 1
196
+ self .num_train_step += 1
197
197
step_bar .close ()
198
198
199
199
def _eval (self , epoch ):
Original file line number Diff line number Diff line change @@ -152,9 +152,9 @@ def _train(self, epoch: int):
152
152
if self .writer :
153
153
self .writer .add_scalar ("train/loss" , self .accumulative_meter .get ("loss" ), global_step )
154
154
self .writer .add_scalar ("train/lr" , self .scheduler .get_last_lr ()[0 ], global_step )
155
- self .num_train_step += 1
156
155
self .accumulative_meter .reset ()
157
156
step_bar .update ()
157
+ self .num_train_step += 1
158
158
159
159
# Save checkpoint
160
160
if (
You can’t perform that action at this time.
0 commit comments