Skip to content

Commit f89348d

Browse files
committed
add dsv3 config for MI355
1 parent ad43031 commit f89348d

File tree

2 files changed

+31
-12
lines changed

2 files changed

+31
-12
lines changed

examples/torchtitan/configs/MI355X/deepseek_v3_16b-pretrain.yaml

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ modules:
1313
model: deepseek_v3_16b.yaml
1414
overrides:
1515
profiling:
16-
enable_profiling: false
16+
enable_profiling: true
1717
save_traces_folder: "profile_trace"
1818
profile_freq: 10
1919
enable_memory_snapshot: false
2020
save_memory_snapshot_folder: "memory_snapshot"
2121

2222
metrics:
23-
log_freq: 10
23+
log_freq: 1
2424
disable_color_printing: false
2525
enable_tensorboard: false
2626
save_tb_folder: "tb"
@@ -38,11 +38,12 @@ modules:
3838
min_lr_factor: 0.1
3939

4040
training:
41+
debug_moe_force_load_balance: true
4142
local_batch_size: 4
4243
seq_len: 4096
4344
max_norm: 1.0 # grad norm clipping
44-
steps: 1000
45-
dataset: "c4" # supported datasets: c4_test (2K), c4 (177M)
45+
steps: 15
46+
dataset: "c4_test" # supported datasets: c4_test (2K), c4 (177M)
4647

4748
parallelism:
4849
data_parallel_replicate_degree: 1
@@ -69,8 +70,16 @@ modules:
6970

7071
compile:
7172
enable: true
72-
components: ["loss"] # ["model", "loss"]
73+
components: ["model", "loss"] # ["model", "loss"]
7374

75+
primus_turbo:
76+
enable_primus_turbo: true
77+
use_turbo_mx_linear: false
78+
use_turbo_float8_linear: true
79+
enable_attention_float8: false
80+
use_turbo_grouped_mm: true
81+
use_moe_fp8: false
82+
7483
# quantize:
7584
# linear:
7685
# float8:

examples/torchtitan/configs/MI355X/deepseek_v3_671b-pretrain.yaml

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ modules:
1313
model: deepseek_v3_671b.yaml
1414
overrides:
1515
profiling:
16-
enable_profiling: false
16+
enable_profiling: true
1717
save_traces_folder: "profile_trace"
1818
profile_freq: 10
1919
enable_memory_snapshot: false
2020
save_memory_snapshot_folder: "memory_snapshot"
2121

2222
metrics:
23-
log_freq: 10
23+
log_freq: 1
2424
disable_color_printing: false
2525
enable_tensorboard: false
2626
save_tb_folder: "tb"
@@ -38,11 +38,12 @@ modules:
3838
min_lr_factor: 0.1
3939

4040
training:
41-
local_batch_size: 4
41+
debug_moe_force_load_balance: true
42+
local_batch_size: 16
4243
seq_len: 4096
4344
max_norm: 1.0 # grad norm clipping
44-
steps: 1000
45-
dataset: "c4" # supported datasets: c4_test (2K), c4 (177M)
45+
steps: 15
46+
dataset: "c4_test" # supported datasets: c4_test (2K), c4 (177M)
4647

4748
parallelism:
4849
data_parallel_replicate_degree: 1
@@ -52,7 +53,7 @@ modules:
5253
enable_async_tensor_parallel: false
5354
pipeline_parallel_degree: 1
5455
pipeline_parallel_schedule: "Interleaved1F1B"
55-
expert_parallel_degree: 1
56+
expert_parallel_degree: 8
5657
expert_tensor_parallel_degree: 1
5758

5859
checkpoint:
@@ -69,7 +70,16 @@ modules:
6970

7071
compile:
7172
enable: true
72-
components: ["loss"] # ["model", "loss"]
73+
components: ["model", "loss"] # ["model", "loss"]
74+
75+
primus_turbo:
76+
enable_primus_turbo: true
77+
use_turbo_mx_linear: false
78+
use_turbo_float8_linear: true
79+
enable_attention_float8: false
80+
use_classic_attention: true
81+
use_turbo_grouped_mm: true
82+
use_moe_fp8: false
7383

7484
# quantize:
7585
# linear:

0 commit comments

Comments
 (0)