Merge pull request huggingface#843 from huggingface/use-gemma3-in-unsloth-example

burtenshaw · web-flow · commit 606a216d4b91 · 2025-03-19T14:57:18.000+01:00
use gemma 3 for unsloth tutorial
diff --git a/chapters/en/chapter12/6.mdx b/chapters/en/chapter12/6.mdx
@@ -1,7 +1,7 @@
 <CourseFloatingBanner chapter={2}
   classNames="absolute z-10 right-0 top-0"
   notebooks={[
-    {label: "Google Colab", value: "https://github.com/unslothai/notebooks/blob/main/nb/HuggingFace%20Course-Llama3.1_(8B)-GRPO.ipynb"},
+    {label: "Google Colab", value: "https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/HuggingFace%20Course-Gemma3_(1B)-GRPO.ipynb"},
 ]} />
 
 # Practical Exercise: GRPO with Unsloth
@@ -32,7 +32,7 @@ Unsloth provides a class (`FastLanguageModel`) that integrates transformers with
 from unsloth import FastLanguageModel
 ```
 
-Now, let's load the Llama 3.1 8B Instruct model and configure it for fine-tuning:
+Now, let's load Google's Gemma 3 1B Instruct model and configure it for fine-tuning:
 
 ```python
 from unsloth import FastLanguageModel
@@ -42,7 +42,7 @@ max_seq_length = 1024  # Can increase for longer reasoning traces
 lora_rank = 32  # Larger rank = smarter, but slower
 
 model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name="meta-llama/meta-Llama-3.1-8B-Instruct",
+    model_name="google/gemma-3-1b-it",
     max_seq_length=max_seq_length,
     load_in_4bit=True,  # False for LoRA 16bit
     fast_inference=True,  # Enable vLLM fast inference
@@ -52,7 +52,7 @@ model, tokenizer = FastLanguageModel.from_pretrained(
 
 model = FastLanguageModel.get_peft_model(
     model,
-    r=lora_rank,  # Choose any number > 0! Suggested 8, 16, 32, 64, 128
+    r=lora_rank,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
     target_modules=[
         "q_proj",
         "k_proj",
@@ -225,6 +225,7 @@ Now we'll set up the GRPO trainer with our model, tokenizer, and reward function
 from trl import GRPOConfig, GRPOTrainer
 
 max_prompt_length = 256
+
 training_args = GRPOConfig(
     learning_rate=5e-6,
     adam_beta1=0.9,
@@ -239,7 +240,7 @@ training_args = GRPOConfig(
     num_generations=6,  # Decrease if out of memory
     max_prompt_length=max_prompt_length,
     max_completion_length=max_seq_length - max_prompt_length,
-    # num_train_epochs = 1,  # Set to 1 for a full training run
+    # num_train_epochs = 1, # Set to 1 for a full training run
     max_steps=250,
     save_steps=250,
     max_grad_norm=0.1,