We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e11ddec commit 792c295Copy full SHA for 792c295
examples/models/llama/model_args.py
@@ -4,12 +4,12 @@
4
5
@dataclass
6
class ModelArgs:
7
- dim: int = 2048
+ dim: int = 4096
8
n_layers: int = 4
9
n_heads: int = 32
10
n_kv_heads: Optional[int] = None
11
vocab_size: int = 512 # Arbitrary value, should be defined later by tokenizer.
12
- hidden_dim: Optional[int] = 8192
+ hidden_dim: Optional[int] = None
13
head_dim: Optional[int] = None # Optional customized head_dim
14
multiple_of: int = 256 # make SwiGLU hidden layer size multiple of large power of 2
15
ffn_dim_multiplier: Optional[float] = None
0 commit comments