diff --git a/apps/grpo/qwen3_1_7b.yaml b/apps/grpo/qwen3_1_7b.yaml index 14e4871cf..057ee2bcf 100644 --- a/apps/grpo/qwen3_1_7b.yaml +++ b/apps/grpo/qwen3_1_7b.yaml @@ -74,8 +74,9 @@ trainer: disable_loss_parallel: true checkpoint: enable: true - initial_load_path: hf://${model} - initial_load_in_hf: true + folder: ./checkpoint # The folder to save checkpoints to. + initial_load_path: hf://${model} # The path to load the initial checkpoint from. Ignored if `folder` exists. + initial_load_in_hf: true # If true, interpret initial_load_path as a HuggingFace model repo last_save_in_hf: true interval: 500 async_mode: "disabled" diff --git a/apps/grpo/qwen3_32b.yaml b/apps/grpo/qwen3_32b.yaml index e7a0cf509..c39260b45 100644 --- a/apps/grpo/qwen3_32b.yaml +++ b/apps/grpo/qwen3_32b.yaml @@ -77,8 +77,9 @@ trainer: disable_loss_parallel: true checkpoint: enable: true - initial_load_path: hf://${model} - initial_load_in_hf: true + folder: ./checkpoint # The folder to save checkpoints to. + initial_load_path: hf://${model} # The path to load the initial checkpoint from. Ignored if `folder` exists. + initial_load_in_hf: true # If true, interpret initial_load_path as a HuggingFace model repo last_save_in_hf: true interval: 500 async_mode: "disabled" diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml index 534e5b92a..b0b4bf96e 100644 --- a/apps/grpo/qwen3_8b.yaml +++ b/apps/grpo/qwen3_8b.yaml @@ -70,8 +70,9 @@ trainer: disable_loss_parallel: true checkpoint: enable: true - initial_load_path: hf://${model} - initial_load_in_hf: true + folder: ./checkpoint # The folder to save checkpoints to. + initial_load_path: hf://${model} # The path to load the initial checkpoint from. Ignored if `folder` exists. + initial_load_in_hf: true # If true, interpret initial_load_path as a HuggingFace model repo last_save_in_hf: true interval: 500 async_mode: "disabled" diff --git a/apps/sft/llama3_8b.yaml b/apps/sft/llama3_8b.yaml index 43a690c1e..44e4485e4 100644 --- a/apps/sft/llama3_8b.yaml +++ b/apps/sft/llama3_8b.yaml @@ -45,8 +45,9 @@ parallelism: checkpoint: enable: true - initial_load_path: hf://${model_name} - initial_load_in_hf: true + folder: ./checkpoint # The folder to save checkpoints to. + initial_load_path: hf://${model} # The path to load the initial checkpoint from. Ignored if `folder` exists. + initial_load_in_hf: true # If true, interpret initial_load_path as a HuggingFace model repo last_save_in_hf: true interval: 500 async_mode: "disabled" diff --git a/apps/sft/qwen3_8b.yaml b/apps/sft/qwen3_8b.yaml index 2ab88bbd3..1c0d5bc8b 100644 --- a/apps/sft/qwen3_8b.yaml +++ b/apps/sft/qwen3_8b.yaml @@ -44,8 +44,9 @@ parallelism: checkpoint: enable: true - initial_load_path: hf://${model_name} - initial_load_in_hf: true + folder: ./checkpoint # The folder to save checkpoints to. + initial_load_path: hf://${model} # The path to load the initial checkpoint from. Ignored if `folder` exists. + initial_load_in_hf: true # If true, interpret initial_load_path as a HuggingFace model repo last_save_in_hf: true interval: 500 async_mode: "disabled"