From 13fca13f71761ade24e7e81774be5209320102de Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Thu, 16 Oct 2025 12:31:46 -0700 Subject: [PATCH] make sure ref_model has same seq_len with trainer --- apps/grpo/qwen3_1_7b.yaml | 1 + apps/grpo/qwen3_32b.yaml | 1 + apps/grpo/qwen3_8b.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/apps/grpo/qwen3_1_7b.yaml b/apps/grpo/qwen3_1_7b.yaml index 14e4871cf..b72f0089a 100644 --- a/apps/grpo/qwen3_1_7b.yaml +++ b/apps/grpo/qwen3_1_7b.yaml @@ -96,6 +96,7 @@ ref_model: flavor: 1.7B hf_assets_path: hf://${model} training: + seq_len: ${trainer.training.seq_len} dtype: bfloat16 gc_freq: 1 compile: diff --git a/apps/grpo/qwen3_32b.yaml b/apps/grpo/qwen3_32b.yaml index e7a0cf509..5c3dcbe31 100644 --- a/apps/grpo/qwen3_32b.yaml +++ b/apps/grpo/qwen3_32b.yaml @@ -99,6 +99,7 @@ ref_model: flavor: 32B hf_assets_path: hf://${model} training: + seq_len: ${trainer.training.seq_len} dtype: bfloat16 gc_freq: 1 compile: diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml index 4a8858269..fc87b1685 100644 --- a/apps/grpo/qwen3_8b.yaml +++ b/apps/grpo/qwen3_8b.yaml @@ -95,6 +95,7 @@ ref_model: flavor: 8B hf_assets_path: hf://${model} training: + seq_len: ${trainer.training.seq_len} dtype: bfloat16 gc_freq: 1 compile: