|
63 | 63 | "## Setup\n", |
64 | 64 | "\n", |
65 | 65 | "Install dependencies and set up the environment:\n", |
66 | | - "https://maxtext.readthedocs.io/latest/tutorials/grpo.html#from-github" |
| 66 | + "https://maxtext.readthedocs.io/en/latest/tutorials/grpo.html#from-github" |
67 | 67 | ] |
68 | 68 | }, |
69 | 69 | { |
|
130 | 130 | " print(\"Authentication failed: Hugging Face token not set\")\n", |
131 | 131 | "\n", |
132 | 132 | "# Optional: Override training parameters\n", |
133 | | - "STEPS = 10 # Reduced for demo purposes\n", |
134 | | - "PER_DEVICE_BATCH_SIZE = 1\n", |
135 | 133 | "LEARNING_RATE = 3e-6\n", |
136 | 134 | "NUM_GENERATIONS = 2\n", |
137 | 135 | "GRPO_BETA = 0.08\n", |
|
197 | 195 | " f\"load_parameters_path={MODEL_CHECKPOINT_PATH}\",\n", |
198 | 196 | " f\"base_output_directory={OUTPUT_DIRECTORY}\",\n", |
199 | 197 | " f\"hf_access_token={HF_TOKEN}\",\n", |
200 | | - " f\"steps={STEPS}\",\n", |
201 | | - " f\"per_device_batch_size={PER_DEVICE_BATCH_SIZE}\",\n", |
202 | 198 | " f\"learning_rate={LEARNING_RATE}\",\n", |
203 | 199 | " f\"num_generations={NUM_GENERATIONS}\",\n", |
204 | 200 | " f\"grpo_beta={GRPO_BETA}\",\n", |
|
211 | 207 | "# Initialize configuration\n", |
212 | 208 | "print(f\"🔧 Initializing configuration from: {config_file}\")\n", |
213 | 209 | "config = pyconfig.initialize(config_argv)\n", |
214 | | - "max_utils.print_system_information()\n", |
215 | 210 | "\n", |
216 | 211 | "print(\"\\n✅ Configuration initialized successfully\")\n", |
217 | 212 | "print(f\"📊 Training steps: {config.steps}\")\n", |
|
261 | 256 | "source": [ |
262 | 257 | "## 📚 Learn More\n", |
263 | 258 | "\n", |
264 | | - "- **CLI Usage**: Run `python3 -m src.MaxText.rl.train_rl src/MaxText/configs/rl.yml --model_name=llama3.1-8b ...`\n", |
| 259 | + "- **CLI Usage**: https://maxtext.readthedocs.io/en/latest/tutorials/grpo.html#run-grpo\n", |
265 | 260 | "- **Configuration**: See `src/MaxText/configs/rl.yml` for all available options\n", |
266 | | - "- **Documentation**: Check `src/MaxText/rl/train_rl.py` for the `rl_train` function implementation\n", |
267 | | - "- **Examples**: See other examples in `src/MaxText/examples/`" |
| 261 | + "- **Documentation**: Check `src/MaxText/rl/train_rl.py` for the `rl_train` function implementation" |
268 | 262 | ] |
269 | 263 | } |
270 | 264 | ], |
|
0 commit comments