-
Notifications
You must be signed in to change notification settings - Fork 943
Closed
Description
# 150 visual token
# 150 visual token
export MASTER_PORT=29503
NPROC_PER_NODE=7 \
CUDA_VISIBLE_DEVICES=0,1,2,3,5,6,7 \
IMAGE_MAX_TOKEN_NUM=150 \
swift sft \
--model Qwen3-VL-8B-Instruct \
--dataset listwise_sft_0923-1_2.2w.jsonl \
--split_dataset_ratio 0.01 \
--train_type custom \
--external_plugins '/examples/train/multimodal/lora_llm_full_vit/custom_plugin.py' \
--torch_dtype bfloat16 \
--num_train_epochs 2 \
--per_device_train_batch_size 2 \
--per_device_eval_batch_size 2 \
--learning_rate 1e-4 \
--vit_lr 1e-5 \
--aligner_lr 1e-5 \
--lora_rank 16 \
--lora_alpha 32 \
--gradient_accumulation_steps 8 \
--eval_steps 100 \
--save_steps 100 \
--save_total_limit 5 \
--logging_steps 5 \
--max_length 32678 \
--output_dir output/Qwen3-VL-8B-1028- \
--warmup_ratio 0.05 \
--deepspeed 'zero2' \
--dataloader_num_workers 4 \
--dataset_num_proc 4 \
--padding_free true \
--attn_impl flash_attn \
--save_only_model true \
--report_to wandb
我一阶段sft训练vl模型 得到checkpoint,二阶段我想加载这个checkpoint然后改为全参训练会报错,不管二阶段是sft还是dpo,比如
export MASTER_PORT=29503
nproc_per_node=2
CUDA_VISIBLE_DEVICES=5,6 \
NPROC_PER_NODE=$nproc_per_node \
IMAGE_MAX_TOKEN_NUM=150 \
swift rlhf \
--rlhf_type dpo \
--model
--dataset data_v2_1989.jsonl \
--load_from_cache_file true \
--split_dataset_ratio 0.01 \
--train_type full \
--torch_dtype bfloat16 \
--resume_from_checkpoint v0-20251016-015527/checkpoint-600 \
--resume_only_model True \
--ignore_data_skip True \
--loss_type sigmoid bco_pair sft \
--loss_weights 0.8 0.2 1.0 \
--rpo_alpha 0.0 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 1e-5 \
--freeze_vit true \
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
--eval_steps 20 \
--save_steps 20 \
--save_total_limit 2 \
--deepspeed zero3 \
--logging_steps 5 \
--max_length 32678 \
--output_dir output/Qwen3-VL-8B-Instruct-Stage2-MPO-full \
--warmup_ratio 0.05 \
--dataloader_num_workers 4 \
--dataset_num_proc 4 \
--report_to wandb
还有一个二阶段sft也报错,显示没有vit.safetensor
Metadata
Metadata
Assignees
Labels
No labels
