File tree Expand file tree Collapse file tree 2 files changed +8
-6
lines changed
tests/config/benchmark/config/sft Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -6,7 +6,7 @@ train_dataset_prob: "1.0"
66eval_dataset_path : /root/paddlejob/share-storage/gpfs/system-public/efficient_benchmark/dataset/coco_grounding/val.jsonl
77eval_dataset_prob : " 1.0"
88max_seq_len : 8192
9- packing : false
9+ packing : true
1010mix_strategy : concat
1111template_backend : custom
1212template : qwen3_vl
@@ -31,7 +31,7 @@ evaluation_strategy: steps
3131save_steps : 500
3232save_strategy : steps
3333logging_steps : 1
34- gradient_accumulation_steps : 8
34+ gradient_accumulation_steps : 16
3535logging_dir : ./vdl_log
3636output_dir : ./checkpoints/qwen3-vl-sft-full
3737disable_tqdm : true
@@ -61,7 +61,7 @@ moe_grouped_gemm: true
6161moe_deep_gemm : true
6262
6363apply_rope_fusion : False
64- # moe_router_force_load_balancing: true
64+ moe_router_force_load_balancing : false
6565
6666# sharding
6767split_param : true
Original file line number Diff line number Diff line change @@ -32,7 +32,7 @@ save_steps: 500
3232save_strategy : steps
3333logging_steps : 1
3434save_total_limit : 1
35- gradient_accumulation_steps : 8
35+ gradient_accumulation_steps : 32
3636logging_dir : ./vdl_log_sft_full_tp_8B_coco
3737output_dir : ./checkpoints/qwen3-vl-sft-full-tp-8B_coco
3838disable_tqdm : true
@@ -44,7 +44,7 @@ warmup_steps: 20
4444learning_rate : 1.0e-5
4545
4646# performance
47- tensor_model_parallel_size : 4
47+ tensor_model_parallel_size : 2
4848pipeline_model_parallel_size : 1
4949sharding : stage1
5050recompute_granularity : full
@@ -57,4 +57,6 @@ save_checkpoint_format: "flex_checkpoint"
5757load_checkpoint_format : " flex_checkpoint"
5858freeze_config : freeze_vision freeze_aligner
5959
60- benchmark : true
60+ benchmark : true
61+ dataloader_num_workers : 8
62+ prefetch_factor : 8
You can’t perform that action at this time.
0 commit comments