We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent aa48c35 commit 42158dcCopy full SHA for 42158dc
configs/nsa_340M.json
@@ -1,5 +1,13 @@
1
{
2
"attention_bias": false,
3
+ "architectures": [
4
+ "NSAForCausalLM"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "native_sparse_attention.NSAConfig",
8
+ "AutoModel": "native_sparse_attention.NSAModel",
9
+ "AutoModelForCausalLM": "native_sparse_attention.NSAForCausalLM"
10
+ },
11
"bos_token_id": 1,
12
"eos_token_id": 2,
13
"fuse_cross_entropy": true,
@@ -8,7 +16,7 @@
16
"hidden_size": 1024,
17
"initializer_range": 0.006,
18
"max_position_embeddings": 8192,
- "model_type": "transformer",
19
+ "model_type": "nsa",
20
"num_heads": 64,
21
"num_kv_heads": 4,
14
22
"block_size": 64,
0 commit comments