Skip to content

Commit b654e4f

Browse files
committed
Add GatedDeltaNet configs (#30)
1 parent a587400 commit b654e4f

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

configs/gated_deltanet_1B.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"attn_mode": "chunk",
3+
"bos_token_id": 1,
4+
"conv_size": 4,
5+
"eos_token_id": 2,
6+
"expand_v": 2,
7+
"fuse_cross_entropy": true,
8+
"head_dim": 256,
9+
"hidden_act": "swish",
10+
"hidden_ratio": 4,
11+
"hidden_size": 2048,
12+
"initializer_range": 0.02,
13+
"intermediate_size": null,
14+
"model_type": "gated_deltanet",
15+
"norm_eps": 1e-06,
16+
"num_heads": 6,
17+
"num_hidden_layers": 21,
18+
"tie_word_embeddings": false,
19+
"use_cache": true,
20+
"use_gate": true,
21+
"use_short_conv": true
22+
}

configs/gated_deltanet_340M.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"attn_mode": "chunk",
3+
"bos_token_id": 1,
4+
"conv_size": 4,
5+
"eos_token_id": 2,
6+
"expand_v": 2,
7+
"fuse_cross_entropy": true,
8+
"head_dim": 256,
9+
"hidden_act": "swish",
10+
"hidden_ratio": 4,
11+
"hidden_size": 1024,
12+
"initializer_range": 0.02,
13+
"intermediate_size": null,
14+
"model_type": "gated_deltanet",
15+
"norm_eps": 1e-06,
16+
"num_heads": 6,
17+
"num_hidden_layers": 21,
18+
"tie_word_embeddings": false,
19+
"use_cache": true,
20+
"use_gate": true,
21+
"use_short_conv": true
22+
}

0 commit comments

Comments
 (0)