Merge remote-tracking branch 'origin' into 3p-integrations-crusoe

ep-crusoe · ep-crusoe · commit f0850a3ee572 · 2024-10-15T09:33:58.000-07:00
diff --git a/recipes/quickstart/finetuning/README.md b/recipes/quickstart/finetuning/README.md
@@ -8,7 +8,7 @@ This folder contains instructions to fine-tune Meta Llama 3 on a
 
 using the canonical [finetuning script](../../../src/llama_recipes/finetuning.py) in the llama-recipes package.
 
-If you are new to fine-tuning techniques, check out an overview: [](./LLM_finetuning_overview.md)
+If you are new to fine-tuning techniques, check out [an overview](./LLM_finetuning_overview.md).
 
 > [!TIP]
 > If you want to try finetuning Meta Llama 3 in a Jupyter notebook you can find a quickstart notebook [here](./quickstart_peft_finetuning.ipynb)
diff --git a/recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb b/recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb
@@ -8,7 +8,7 @@
     "Copyright (c) Meta Platforms, Inc. and affiliates.\n",
     "This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n",
     "\n",
-    "<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/finetuning/quickstart_peft_finetuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+    "<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
    ]
   },
   {
diff --git a/src/llama_recipes/finetuning.py b/src/llama_recipes/finetuning.py
@@ -167,7 +167,7 @@ def main(**kwargs):
         # Load the pre-trained peft model checkpoint and setup its configuration
         if train_config.from_peft_checkpoint:
             model = PeftModel.from_pretrained(model, train_config.from_peft_checkpoint, is_trainable=True)
-            peft_config = model.peft_config()
+            peft_config = model.peft_config
         # Generate the peft config and start fine-tuning from original model
         else:
             peft_config = generate_peft_config(train_config, kwargs)
diff --git a/src/llama_recipes/utils/train_utils.py b/src/llama_recipes/utils/train_utils.py
@@ -151,11 +151,11 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche
                                 batch[key] = batch[key].to('cuda:0')
                     with autocast():
                         loss = model(**batch).loss
+                    total_loss += loss.detach().float()
                     loss = loss / gradient_accumulation_steps
                     if train_config.save_metrics:
                         train_step_loss.append(loss.detach().float().item())
                         train_step_perplexity.append(float(torch.exp(loss.detach().float())))
-                    total_loss += loss.detach().float()
                     if train_config.use_fp16:
                         # if fp16 is enabled, use gradient scaler to handle gradient update
                         scaler.scale(loss).backward()

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`"Copyright (c) Meta Platforms, Inc. and affiliates.\n",`
`9`	`9`	`"This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n",`
`10`	`10`	`"\n",`
`11`		`- "<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/finetuning/quickstart_peft_finetuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"`
	`11`	`+ "<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/quickstart/finetuning/quickstart_peft_finetuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"`
`12`	`12`	`]`
`13`	`13`	`},`
`14`	`14`	`{`