We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9197908 commit 030879fCopy full SHA for 030879f
torchtitan/experiments/qwen3/__init__.py
@@ -40,6 +40,7 @@
40
qk_norm=True,
41
hidden_dim=3072,
42
rope_theta=1000000,
43
+ enable_weight_tying=True,
44
),
45
"1.7B": Qwen3ModelArgs(
46
vocab_size=151936,
@@ -52,6 +53,7 @@
52
53
54
hidden_dim=6144,
55
56
57
58
"4B": Qwen3ModelArgs(
59
@@ -64,6 +66,7 @@
64
66
65
67
hidden_dim=9728,
68
69
70
71
"8B": Qwen3ModelArgs(
72
0 commit comments