Skip to content

Commit d6cd215

Browse files
committed
upd
1 parent d4f0f78 commit d6cd215

File tree

1 file changed

+70
-0
lines changed

1 file changed

+70
-0
lines changed

apps/sft/deepseek_v3.yaml

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# >>> python -m apps.sft.main --config apps/sft/deepseek_v3.yaml
2+
3+
4+
# TODO: required by torchtitan
5+
# https://github.com/pytorch/torchtitan/blob/2f1c814da071cc8ad165d00be6f9c1a66f8e1cce/torchtitan/distributed/utils.py#L265
6+
comm:
7+
trace_buf_size: 0
8+
9+
model_name: "deepseek-ai/DeepSeek-V3.1-Base"
10+
11+
model:
12+
name: deepseek_v3
13+
flavor: 16B
14+
hf_assets_path: hf://${model_name}
15+
16+
processes:
17+
procs: 8
18+
with_gpus: true
19+
20+
optimizer:
21+
name: AdamW
22+
lr: 1e-5
23+
eps: 1e-8
24+
25+
lr_scheduler:
26+
warmup_steps: 200
27+
28+
training:
29+
local_batch_size: 1
30+
seq_len: 2048
31+
max_norm: 1.0
32+
steps: 1000
33+
compile: false
34+
dataset: "c4"
35+
36+
parallelism:
37+
data_parallel_replicate_degree: 1
38+
data_parallel_shard_degree: -1
39+
tensor_parallel_degree: 1
40+
pipeline_parallel_degree: 1
41+
context_parallel_degree: 1
42+
expert_parallel_degree: 1
43+
disable_loss_parallel: false
44+
45+
checkpoint:
46+
enable: true
47+
folder: ./checkpoint # The folder to save checkpoints to.
48+
initial_load_path: hf://${model_name} # The path to load the initial checkpoint from. Ignored if `folder` exists.
49+
initial_load_in_hf: true # If true, interpret initial_load_path as a HuggingFace model repo
50+
last_save_in_hf: true
51+
interval: 500
52+
async_mode: "disabled"
53+
54+
activation_checkpoint:
55+
mode: selective
56+
selective_ac_option: op
57+
58+
metric_logging:
59+
wandb:
60+
project: sft-training
61+
group: sft_exp_${oc.env:USER}
62+
logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce
63+
64+
# profiling:
65+
# enable_profiling: false
66+
67+
# metrics:
68+
# log_freq: 10
69+
# enable_tensorboard: true
70+
# save_tb_folder: "tb"

0 commit comments

Comments
 (0)