Skip to content

Commit b87c93d

Browse files
Add finewebedu_lm.txt with model parameters
Added finewebedu_lm.txt with model parameters for Muon and Adam.
1 parent fd6584d commit b87c93d

File tree

1 file changed

+88
-0
lines changed

1 file changed

+88
-0
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
I0209 15:07:04.723858 22823061509952 utils.py:28] Adam params:
2+
_orig_mod.module.layers.0.attn.attn_scale (ndim=0)
3+
_orig_mod.module.layers.0.attn_norm.weight (ndim=1)
4+
_orig_mod.module.layers.0.mlp_norm.weight (ndim=1)
5+
_orig_mod.module.layers.1.attn.attn_scale (ndim=0)
6+
_orig_mod.module.layers.1.attn_norm.weight (ndim=1)
7+
_orig_mod.module.layers.1.mlp_norm.weight (ndim=1)
8+
_orig_mod.module.layers.2.attn.attn_scale (ndim=0)
9+
_orig_mod.module.layers.2.attn_norm.weight (ndim=1)
10+
_orig_mod.module.layers.2.mlp_norm.weight (ndim=1)
11+
_orig_mod.module.layers.3.attn.attn_scale (ndim=0)
12+
_orig_mod.module.layers.3.attn_norm.weight (ndim=1)
13+
_orig_mod.module.layers.3.mlp_norm.weight (ndim=1)
14+
_orig_mod.module.layers.4.attn.attn_scale (ndim=0)
15+
_orig_mod.module.layers.4.attn_norm.weight (ndim=1)
16+
_orig_mod.module.layers.4.mlp_norm.weight (ndim=1)
17+
_orig_mod.module.layers.5.attn.attn_scale (ndim=0)
18+
_orig_mod.module.layers.5.attn_norm.weight (ndim=1)
19+
_orig_mod.module.layers.5.mlp_norm.weight (ndim=1)
20+
_orig_mod.module.layers.6.attn.attn_scale (ndim=0)
21+
_orig_mod.module.layers.6.attn_norm.weight (ndim=1)
22+
_orig_mod.module.layers.6.mlp_norm.weight (ndim=1)
23+
_orig_mod.module.layers.7.attn.attn_scale (ndim=0)
24+
_orig_mod.module.layers.7.attn_norm.weight (ndim=1)
25+
_orig_mod.module.layers.7.mlp_norm.weight (ndim=1)
26+
_orig_mod.module.layers.8.attn.attn_scale (ndim=0)
27+
_orig_mod.module.layers.8.attn_norm.weight (ndim=1)
28+
_orig_mod.module.layers.8.mlp_norm.weight (ndim=1)
29+
_orig_mod.module.layers.9.attn.attn_scale (ndim=0)
30+
_orig_mod.module.layers.9.attn_norm.weight (ndim=1)
31+
_orig_mod.module.layers.9.mlp_norm.weight (ndim=1)
32+
_orig_mod.module.layers.10.attn.attn_scale (ndim=0)
33+
_orig_mod.module.layers.10.attn_norm.weight (ndim=1)
34+
_orig_mod.module.layers.10.mlp_norm.weight (ndim=1)
35+
_orig_mod.module.layers.11.attn.attn_scale (ndim=0)
36+
_orig_mod.module.layers.11.attn_norm.weight (ndim=1)
37+
_orig_mod.module.layers.11.mlp_norm.weight (ndim=1)
38+
_orig_mod.module.out_norm.weight (ndim=1)
39+
I0209 15:07:04.727331 23108874655552 utils.py:27] Muon params:
40+
_orig_mod.module.embed_tokens.weight (ndim=2)
41+
_orig_mod.module.layers.0.attn.w_qkv.weight (ndim=2)
42+
_orig_mod.module.layers.0.attn.w_out.weight (ndim=2)
43+
_orig_mod.module.layers.0.mlp.fc1.weight (ndim=2)
44+
_orig_mod.module.layers.0.mlp.fc2.weight (ndim=2)
45+
_orig_mod.module.layers.1.attn.w_qkv.weight (ndim=2)
46+
_orig_mod.module.layers.1.attn.w_out.weight (ndim=2)
47+
_orig_mod.module.layers.1.mlp.fc1.weight (ndim=2)
48+
_orig_mod.module.layers.1.mlp.fc2.weight (ndim=2)
49+
_orig_mod.module.layers.2.attn.w_qkv.weight (ndim=2)
50+
_orig_mod.module.layers.2.attn.w_out.weight (ndim=2)
51+
_orig_mod.module.layers.2.mlp.fc1.weight (ndim=2)
52+
_orig_mod.module.layers.2.mlp.fc2.weight (ndim=2)
53+
_orig_mod.module.layers.3.attn.w_qkv.weight (ndim=2)
54+
_orig_mod.module.layers.3.attn.w_out.weight (ndim=2)
55+
_orig_mod.module.layers.3.mlp.fc1.weight (ndim=2)
56+
_orig_mod.module.layers.3.mlp.fc2.weight (ndim=2)
57+
_orig_mod.module.layers.4.attn.w_qkv.weight (ndim=2)
58+
_orig_mod.module.layers.4.attn.w_out.weight (ndim=2)
59+
_orig_mod.module.layers.4.mlp.fc1.weight (ndim=2)
60+
_orig_mod.module.layers.4.mlp.fc2.weight (ndim=2)
61+
_orig_mod.module.layers.5.attn.w_qkv.weight (ndim=2)
62+
_orig_mod.module.layers.5.attn.w_out.weight (ndim=2)
63+
_orig_mod.module.layers.5.mlp.fc1.weight (ndim=2)
64+
_orig_mod.module.layers.5.mlp.fc2.weight (ndim=2)
65+
_orig_mod.module.layers.6.attn.w_qkv.weight (ndim=2)
66+
_orig_mod.module.layers.6.attn.w_out.weight (ndim=2)
67+
_orig_mod.module.layers.6.mlp.fc1.weight (ndim=2)
68+
_orig_mod.module.layers.6.mlp.fc2.weight (ndim=2)
69+
_orig_mod.module.layers.7.attn.w_qkv.weight (ndim=2)
70+
_orig_mod.module.layers.7.attn.w_out.weight (ndim=2)
71+
_orig_mod.module.layers.7.mlp.fc1.weight (ndim=2)
72+
_orig_mod.module.layers.7.mlp.fc2.weight (ndim=2)
73+
_orig_mod.module.layers.8.attn.w_qkv.weight (ndim=2)
74+
_orig_mod.module.layers.8.attn.w_out.weight (ndim=2)
75+
_orig_mod.module.layers.8.mlp.fc1.weight (ndim=2)
76+
_orig_mod.module.layers.8.mlp.fc2.weight (ndim=2)
77+
_orig_mod.module.layers.9.attn.w_qkv.weight (ndim=2)
78+
_orig_mod.module.layers.9.attn.w_out.weight (ndim=2)
79+
_orig_mod.module.layers.9.mlp.fc1.weight (ndim=2)
80+
_orig_mod.module.layers.9.mlp.fc2.weight (ndim=2)
81+
_orig_mod.module.layers.10.attn.w_qkv.weight (ndim=2)
82+
_orig_mod.module.layers.10.attn.w_out.weight (ndim=2)
83+
_orig_mod.module.layers.10.mlp.fc1.weight (ndim=2)
84+
_orig_mod.module.layers.10.mlp.fc2.weight (ndim=2)
85+
_orig_mod.module.layers.11.attn.w_qkv.weight (ndim=2)
86+
_orig_mod.module.layers.11.attn.w_out.weight (ndim=2)
87+
_orig_mod.module.layers.11.mlp.fc1.weight (ndim=2)
88+
_orig_mod.module.layers.11.mlp.fc2.weight (ndim=2)

0 commit comments

Comments
 (0)