File tree Expand file tree Collapse file tree 2 files changed +0
-4
lines changed
torchtitan/models/deepseek_v3 Expand file tree Collapse file tree 2 files changed +0
-4
lines changed Original file line number Diff line number Diff line change 150
150
qk_nope_head_dim = 128 ,
151
151
qk_rope_head_dim = 64 ,
152
152
v_head_dim = 128 ,
153
- dtype = "fp8" ,
154
153
),
155
154
}
156
155
Original file line number Diff line number Diff line change 8
8
9
9
10
10
from dataclasses import dataclass , field
11
- from typing import Literal
12
11
13
12
from torch import nn
14
13
@@ -28,7 +27,6 @@ class DeepSeekV3ModelArgs(BaseModelArgs):
28
27
Attributes:
29
28
max_batch_size (int): Maximum batch size.
30
29
max_seq_len (int): Maximum sequence length.
31
- dtype (Literal["bf16", "fp8"]): Data type for computations.
32
30
vocab_size (int): Vocabulary size.
33
31
dim (int): Model dimension.
34
32
inter_dim (int): Intermediate dimension for MLP layers.
@@ -59,7 +57,6 @@ class DeepSeekV3ModelArgs(BaseModelArgs):
59
57
60
58
max_batch_size : int = 8
61
59
max_seq_len : int = 4096 * 4
62
- dtype : Literal ["bf16" , "fp8" ] = "bf16"
63
60
vocab_size : int = 102400
64
61
dim : int = 2048
65
62
inter_dim : int = 10944
You can’t perform that action at this time.
0 commit comments