Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions skyrl-train/examples/gsm8k/convert_megatron_to_hf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
set -x

# Colocated GRPO model conversion pipeline from megatron to huggingface.

# Assumed that you already have the megatron checkpoints for Qwen2.5-1.5B-Instruct on GSM8K, and you have already finished the training.

## TRAINING SCRIPT ##
# uv run examples/gsm8k/gsm8k_dataset.py --output_dir $HOME/data/gsm8k
# export WANDB_API_KEY=<your_key_here>
# bash examples/gsm8k/run_gsm8k.sh

# NOTE (sumanthrh): `micro_train_batch_size_per_gpu` and `micro_forward_batch_size_per_gpu` can be tuned

# Then you can execute the following script like this:

# bash examples/gsm8k/convert_megatron_to_hf.sh

: "${DATA_DIR:="$HOME/data/gsm8k"}"
: "${NUM_GPUS:=1}"
: "${LOGGER:=wandb}" # change to "console" to print to stdout

: "${INFERENCE_BACKEND:=vllm}"


uv run --isolated --extra mcore -m skyrl_train.entrypoints.main_load \
data.train_data="['$DATA_DIR/train.parquet']" \
data.val_data="['$DATA_DIR/validation.parquet']" \
trainer.algorithm.advantage_estimator="grpo" \
trainer.policy.model.path="Qwen/Qwen2.5-0.5B-Instruct" \
trainer.placement.colocate_all=true \
trainer.strategy=megatron \
trainer.placement.policy_num_gpus_per_node=$NUM_GPUS \
trainer.placement.critic_num_gpus_per_node=$NUM_GPUS \
trainer.placement.ref_num_gpus_per_node=$NUM_GPUS \
generator.num_inference_engines=$NUM_GPUS \
generator.inference_engine_tensor_parallel_size=1 \
trainer.epochs=20 \
trainer.eval_batch_size=1024 \
trainer.eval_before_train=true \
trainer.eval_interval=5 \
trainer.update_epochs_per_batch=1 \
trainer.train_batch_size=1024 \
trainer.policy_mini_batch_size=256 \
trainer.micro_forward_batch_size_per_gpu=1 \
trainer.micro_train_batch_size_per_gpu=1 \
trainer.ckpt_interval=1 \
trainer.max_prompt_length=512 \
generator.sampling_params.max_generate_length=1024 \
trainer.policy.optimizer_config.lr=1.0e-6 \
trainer.algorithm.use_kl_loss=true \
generator.backend=$INFERENCE_BACKEND \
generator.run_engines_locally=true \
generator.weight_sync_backend=nccl \
generator.async_engine=true \
generator.batched=true \
environment.env_class=gsm8k \
generator.n_samples_per_prompt=5 \
generator.gpu_memory_utilization=0.8 \
trainer.logger="$LOGGER" \
trainer.project_name="gsm8k" \
trainer.run_name="gsm8k_test" \
trainer.ckpt_path="$HOME/ckpts/gsm8k_1.5B_ckpt" \
trainer.resume_mode="latest"\
$@
278 changes: 0 additions & 278 deletions skyrl-train/skyrl_train/distributed/megatron/megatron_strategy.py

This file was deleted.

Loading