11<CourseFloatingBanner chapter = { 2 }
22 classNames = " absolute z-10 right-0 top-0"
33 notebooks = { [
4- {label: " Google Colab" , value: " https://github. com/unslothai/notebooks/blob/main/nb/HuggingFace%20Course-Llama3.1_(8B )-GRPO.ipynb" },
4+ {label: " Google Colab" , value: " https://colab.research.google. com/github/ unslothai/notebooks/blob/main/nb/HuggingFace%20Course-Gemma3_(1B )-GRPO.ipynb" },
55]} />
66
77# Practical Exercise: GRPO with Unsloth
@@ -32,7 +32,7 @@ Unsloth provides a class (`FastLanguageModel`) that integrates transformers with
3232from unsloth import FastLanguageModel
3333```
3434
35- Now, let's load the Llama 3.1 8B Instruct model and configure it for fine-tuning:
35+ Now, let's load Google's Gemma 3 1B Instruct model and configure it for fine-tuning:
3636
3737``` python
3838from unsloth import FastLanguageModel
@@ -42,7 +42,7 @@ max_seq_length = 1024 # Can increase for longer reasoning traces
4242lora_rank = 32 # Larger rank = smarter, but slower
4343
4444model, tokenizer = FastLanguageModel.from_pretrained(
45- model_name = " meta-llama/meta-Llama-3.1-8B-Instruct " ,
45+ model_name = " google/gemma-3-1b-it " ,
4646 max_seq_length = max_seq_length,
4747 load_in_4bit = True , # False for LoRA 16bit
4848 fast_inference = True , # Enable vLLM fast inference
@@ -52,7 +52,7 @@ model, tokenizer = FastLanguageModel.from_pretrained(
5252
5353model = FastLanguageModel.get_peft_model(
5454 model,
55- r = lora_rank, # Choose any number > 0! Suggested 8, 16, 32, 64, 128
55+ r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
5656 target_modules = [
5757 " q_proj" ,
5858 " k_proj" ,
@@ -225,6 +225,7 @@ Now we'll set up the GRPO trainer with our model, tokenizer, and reward function
225225from trl import GRPOConfig, GRPOTrainer
226226
227227max_prompt_length = 256
228+
228229training_args = GRPOConfig(
229230 learning_rate = 5e-6 ,
230231 adam_beta1 = 0.9 ,
@@ -239,7 +240,7 @@ training_args = GRPOConfig(
239240 num_generations = 6 , # Decrease if out of memory
240241 max_prompt_length = max_prompt_length,
241242 max_completion_length = max_seq_length - max_prompt_length,
242- # num_train_epochs = 1, # Set to 1 for a full training run
243+ # num_train_epochs = 1, # Set to 1 for a full training run
243244 max_steps = 250 ,
244245 save_steps = 250 ,
245246 max_grad_norm = 0.1 ,
0 commit comments