diff --git a/README.md b/README.md index 5142b91a0..92dd76fcb 100644 --- a/README.md +++ b/README.md @@ -62,11 +62,10 @@ $ gh release create v0.0.0 assets/wheels/vllm-*.whl --title "Forge Wheels v0.0.0 ## Quick Start -To run SFT for Llama3 8B, run +To run SFT on a Llama3 8B model, run ```bash -uv run forge download meta-llama/Meta-Llama-3.1-8B-Instruct --output-dir /tmp/Meta-Llama-3.1-8B-Instruct --ignore-patterns "original/consolidated.00.pth" -uv run forge run --nproc_per_node 2 apps/sft/main.py --config apps/sft/llama3_8b.yaml +python -m apps.sft_v2.main --config apps/sft_v2/llama3_8b.yaml ``` ### Citation diff --git a/apps/sft_v2/llama3_8b.yaml b/apps/sft_v2/llama3_8b.yaml index 86fd88ca5..273d2d592 100644 --- a/apps/sft_v2/llama3_8b.yaml +++ b/apps/sft_v2/llama3_8b.yaml @@ -1,22 +1,18 @@ # >>> python -m apps.sft_v2.main --config apps/sft_v2/llama3_8b.yaml # Config for supervised full finetuning using a Llama3.1 8B Instruct model -# -# This config assumes that you've run the following command before launching -# this run: -# export HF_HUB_DISABLE_XET=1 -# forge download meta-llama/Meta-Llama-3.1-8B-Instruct --output-dir /tmp/Meta-Llama-3.1-8B-Instruct - # TODO: required by torchtitan # https://github.com/pytorch/torchtitan/blob/2f1c814da071cc8ad165d00be6f9c1a66f8e1cce/torchtitan/distributed/utils.py#L265 comm: trace_buf_size: 0 +model_name: "meta-llama/Meta-Llama-3.1-8B-Instruct" + model: name: llama3 flavor: 8B - hf_assets_path: /tmp/Meta-Llama-3.1-8B-Instruct + hf_assets_path: hf://${model_name} processes: procs: 8 @@ -49,8 +45,7 @@ parallelism: checkpoint: enable: true - folder: /tmp/Meta-Llama-3.1-8B-Instruct/saved_checkpoints - initial_load_path: /tmp/Meta-Llama-3.1-8B-Instruct/ + initial_load_path: hf://${model_name} initial_load_in_hf: true last_save_in_hf: true interval: 500