Skip to content

Commit d6ae203

Browse files
Fix fine-tuning training loss accumulation (meta-llama#725)
1 parent b9ec61a commit d6ae203

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

src/llama_recipes/utils/train_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,11 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche
151151
batch[key] = batch[key].to('cuda:0')
152152
with autocast():
153153
loss = model(**batch).loss
154+
total_loss += loss.detach().float()
154155
loss = loss / gradient_accumulation_steps
155156
if train_config.save_metrics:
156157
train_step_loss.append(loss.detach().float().item())
157158
train_step_perplexity.append(float(torch.exp(loss.detach().float())))
158-
total_loss += loss.detach().float()
159159
if train_config.use_fp16:
160160
# if fp16 is enabled, use gradient scaler to handle gradient update
161161
scaler.scale(loss).backward()

0 commit comments

Comments
 (0)