File tree Expand file tree Collapse file tree 1 file changed +70
-0
lines changed
Expand file tree Collapse file tree 1 file changed +70
-0
lines changed Original file line number Diff line number Diff line change 1+ # >>> python -m apps.sft.main --config apps/sft/deepseek_v3.yaml
2+
3+
4+ # TODO: required by torchtitan
5+ # https://github.com/pytorch/torchtitan/blob/2f1c814da071cc8ad165d00be6f9c1a66f8e1cce/torchtitan/distributed/utils.py#L265
6+ comm :
7+ trace_buf_size : 0
8+
9+ model_name : " deepseek-ai/DeepSeek-V3.1-Base"
10+
11+ model :
12+ name : deepseek_v3
13+ flavor : 16B
14+ hf_assets_path : hf://${model_name}
15+
16+ processes :
17+ procs : 8
18+ with_gpus : true
19+
20+ optimizer :
21+ name : AdamW
22+ lr : 1e-5
23+ eps : 1e-8
24+
25+ lr_scheduler :
26+ warmup_steps : 200
27+
28+ training :
29+ local_batch_size : 1
30+ seq_len : 2048
31+ max_norm : 1.0
32+ steps : 1000
33+ compile : false
34+ dataset : " c4"
35+
36+ parallelism :
37+ data_parallel_replicate_degree : 1
38+ data_parallel_shard_degree : -1
39+ tensor_parallel_degree : 1
40+ pipeline_parallel_degree : 1
41+ context_parallel_degree : 1
42+ expert_parallel_degree : 1
43+ disable_loss_parallel : false
44+
45+ checkpoint :
46+ enable : true
47+ folder : ./checkpoint # The folder to save checkpoints to.
48+ initial_load_path : hf://${model_name} # The path to load the initial checkpoint from. Ignored if `folder` exists.
49+ initial_load_in_hf : true # If true, interpret initial_load_path as a HuggingFace model repo
50+ last_save_in_hf : true
51+ interval : 500
52+ async_mode : " disabled"
53+
54+ activation_checkpoint :
55+ mode : selective
56+ selective_ac_option : op
57+
58+ metric_logging :
59+ wandb :
60+ project : sft-training
61+ group : sft_exp_${oc.env:USER}
62+ logging_mode : global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce
63+
64+ # profiling:
65+ # enable_profiling: false
66+
67+ # metrics:
68+ # log_freq: 10
69+ # enable_tensorboard: true
70+ # save_tb_folder: "tb"
You can’t perform that action at this time.
0 commit comments