Skip to content

Commit 9f47ceb

Browse files
authored
[dsv3] Remove dtype to avoid confusion (#1599)
Remove unused dtype field, we haven't supported FP8 training in torchtitan dsv3 now.
1 parent 7f1fa48 commit 9f47ceb

File tree

2 files changed

+0
-4
lines changed

2 files changed

+0
-4
lines changed

torchtitan/models/deepseek_v3/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@
150150
qk_nope_head_dim=128,
151151
qk_rope_head_dim=64,
152152
v_head_dim=128,
153-
dtype="fp8",
154153
),
155154
}
156155

torchtitan/models/deepseek_v3/model/args.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99

1010
from dataclasses import dataclass, field
11-
from typing import Literal
1211

1312
from torch import nn
1413

@@ -28,7 +27,6 @@ class DeepSeekV3ModelArgs(BaseModelArgs):
2827
Attributes:
2928
max_batch_size (int): Maximum batch size.
3029
max_seq_len (int): Maximum sequence length.
31-
dtype (Literal["bf16", "fp8"]): Data type for computations.
3230
vocab_size (int): Vocabulary size.
3331
dim (int): Model dimension.
3432
inter_dim (int): Intermediate dimension for MLP layers.
@@ -59,7 +57,6 @@ class DeepSeekV3ModelArgs(BaseModelArgs):
5957

6058
max_batch_size: int = 8
6159
max_seq_len: int = 4096 * 4
62-
dtype: Literal["bf16", "fp8"] = "bf16"
6360
vocab_size: int = 102400
6461
dim: int = 2048
6562
inter_dim: int = 10944

0 commit comments

Comments
 (0)