We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
norm_first
1 parent b654e4f commit 4790efeCopy full SHA for 4790efe
configs/delta_net_340M.json
@@ -13,7 +13,6 @@
13
"intermediate_size": null,
14
"model_type": "delta_net",
15
"norm_eps": 1e-06,
16
- "norm_first": false,
17
"num_heads": 8,
18
"num_hidden_layers": 24,
19
"qk_activation": "silu",
0 commit comments