@@ -118,10 +118,6 @@ class LLM:
118118 EMBEDDING_SCALE = "{arch}.embedding_scale"
119119 TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
120120 INTERLEAVE_MOE_LAYER_STEP = "{arch}.interleave_moe_layer_step"
121- ACTIVATION_SPARSITY_SCALE = "{arch}.activation_sparsity_scale"
122- ALTUP_ACTIVE_IDX = "{arch}.altup.active_idx"
123- ALTUP_NUM_INPUTS = "{arch}.altup.num_inputs"
124- EMBD_LENGTH_PER_LAYER_INP = "{arch}.embedding_length_per_layer_input"
125121
126122 class Attention :
127123 HEAD_COUNT = "{arch}.attention.head_count"
@@ -146,8 +142,6 @@ class Attention:
146142 SCALE = "{arch}.attention.scale"
147143 KEY_LENGTH_MLA = "{arch}.attention.key_length_mla"
148144 VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
149- SHARED_KV_LAYERS = "{arch}.attention.shared_kv_layers"
150- SLIDING_WINDOW_PATTERN = "{arch}.attention.sliding_window_pattern"
151145
152146 class Rope :
153147 DIMENSION_COUNT = "{arch}.rope.dimension_count"
@@ -205,7 +199,6 @@ class Tokenizer:
205199 MASK_ID = "tokenizer.ggml.mask_token_id"
206200 ADD_BOS = "tokenizer.ggml.add_bos_token"
207201 ADD_EOS = "tokenizer.ggml.add_eos_token"
208- ADD_SEP = "tokenizer.ggml.add_sep_token"
209202 ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
210203 REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces"
211204 PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap"
@@ -299,7 +292,6 @@ class MODEL_ARCH(IntEnum):
299292 BERT = auto ()
300293 NOMIC_BERT = auto ()
301294 NOMIC_BERT_MOE = auto ()
302- NEO_BERT = auto ()
303295 JINA_BERT_V2 = auto ()
304296 BLOOM = auto ()
305297 STABLELM = auto ()
@@ -321,7 +313,6 @@ class MODEL_ARCH(IntEnum):
321313 GEMMA = auto ()
322314 GEMMA2 = auto ()
323315 GEMMA3 = auto ()
324- GEMMA3N = auto ()
325316 STARCODER2 = auto ()
326317 RWKV6 = auto ()
327318 RWKV6QWEN2 = auto ()
@@ -355,8 +346,6 @@ class MODEL_ARCH(IntEnum):
355346 PLM = auto ()
356347 BAILINGMOE = auto ()
357348 DOTS1 = auto ()
358- ARCEE = auto ()
359- ERNIE4_5 = auto ()
360349
361350
362351class VISION_PROJECTOR_TYPE (IntEnum ):
@@ -409,22 +398,6 @@ class MODEL_TENSOR(IntEnum):
409398 ATTN_Q_NORM = auto ()
410399 ATTN_K_NORM = auto ()
411400 LAYER_OUT_NORM = auto ()
412- PER_LAYER_TOKEN_EMBD = auto () # gemma3n
413- PER_LAYER_MODEL_PROJ = auto () # gemma3n
414- PER_LAYER_INP_GATE = auto () # gemma3n
415- PER_LAYER_PROJ = auto () # gemma3n
416- PER_LAYER_PROJ_NORM = auto () # gemma3n
417- PER_LAYER_POST_NORM = auto () # gemma3n
418- ALTUP_PROJ = auto () # gemma3n
419- ALTUP_UNEMBD_PROJ = auto () # gemma3n
420- ALTUP_CORRECT_COEF = auto () # gemma3n
421- ALTUP_CORRECT_SCALE = auto () # gemma3n
422- ALTUP_PREDICT_COEF = auto () # gemma3n
423- ALTUP_ROUTER = auto () # gemma3n
424- ALTUP_ROUTER_NORM = auto () # gemma3n
425- LAUREL_L = auto () # gemma3n
426- LAUREL_R = auto () # gemma3n
427- LAUREL_POST_NORM = auto () # gemma3n
428401 SSM_IN = auto ()
429402 SSM_CONV1D = auto ()
430403 SSM_X = auto ()
@@ -602,7 +575,6 @@ class MODEL_TENSOR(IntEnum):
602575 MODEL_ARCH .BERT : "bert" ,
603576 MODEL_ARCH .NOMIC_BERT : "nomic-bert" ,
604577 MODEL_ARCH .NOMIC_BERT_MOE : "nomic-bert-moe" ,
605- MODEL_ARCH .NEO_BERT : "neo-bert" ,
606578 MODEL_ARCH .JINA_BERT_V2 : "jina-bert-v2" ,
607579 MODEL_ARCH .BLOOM : "bloom" ,
608580 MODEL_ARCH .STABLELM : "stablelm" ,
@@ -624,7 +596,6 @@ class MODEL_TENSOR(IntEnum):
624596 MODEL_ARCH .GEMMA : "gemma" ,
625597 MODEL_ARCH .GEMMA2 : "gemma2" ,
626598 MODEL_ARCH .GEMMA3 : "gemma3" ,
627- MODEL_ARCH .GEMMA3N : "gemma3n" ,
628599 MODEL_ARCH .STARCODER2 : "starcoder2" ,
629600 MODEL_ARCH .RWKV6 : "rwkv6" ,
630601 MODEL_ARCH .RWKV6QWEN2 : "rwkv6qwen2" ,
@@ -657,9 +628,7 @@ class MODEL_TENSOR(IntEnum):
657628 MODEL_ARCH .WAVTOKENIZER_DEC : "wavtokenizer-dec" ,
658629 MODEL_ARCH .PLM : "plm" ,
659630 MODEL_ARCH .BAILINGMOE : "bailingmoe" ,
660- MODEL_ARCH .DOTS1 : "dots1" ,
661- MODEL_ARCH .ARCEE : "arcee" ,
662- MODEL_ARCH .ERNIE4_5 : "ernie4_5" ,
631+ MODEL_ARCH .DOTS1 : "dots1"
663632}
664633
665634VISION_PROJECTOR_TYPE_NAMES : dict [VISION_PROJECTOR_TYPE , str ] = {
@@ -712,22 +681,6 @@ class MODEL_TENSOR(IntEnum):
712681 MODEL_TENSOR .FFN_UP_EXP : "blk.{bid}.ffn_up_exps" ,
713682 MODEL_TENSOR .FFN_EXP_PROBS_B : "blk.{bid}.exp_probs_b" ,
714683 MODEL_TENSOR .LAYER_OUT_NORM : "blk.{bid}.layer_output_norm" ,
715- MODEL_TENSOR .PER_LAYER_TOKEN_EMBD : "per_layer_token_embd" , # gemma3n
716- MODEL_TENSOR .PER_LAYER_MODEL_PROJ : "per_layer_model_proj" , # gemma3n
717- MODEL_TENSOR .PER_LAYER_PROJ_NORM : "per_layer_proj_norm" , # gemma3n
718- MODEL_TENSOR .ALTUP_UNEMBD_PROJ : "altup_unembd_proj" , # gemma3n
719- MODEL_TENSOR .ALTUP_PROJ : "altup_proj" , # gemma3n
720- MODEL_TENSOR .PER_LAYER_INP_GATE : "blk.{bid}.inp_gate" , # gemma3n
721- MODEL_TENSOR .PER_LAYER_PROJ : "blk.{bid}.proj" , # gemma3n
722- MODEL_TENSOR .PER_LAYER_POST_NORM : "blk.{bid}.post_norm" , # gemma3n
723- MODEL_TENSOR .ALTUP_CORRECT_COEF : "blk.{bid}.altup_correct_coef" , # gemma3n
724- MODEL_TENSOR .ALTUP_CORRECT_SCALE : "blk.{bid}.altup_correct_scale" , # gemma3n
725- MODEL_TENSOR .ALTUP_PREDICT_COEF : "blk.{bid}.altup_predict_coef" , # gemma3n
726- MODEL_TENSOR .ALTUP_ROUTER : "blk.{bid}.altup_router" , # gemma3n
727- MODEL_TENSOR .ALTUP_ROUTER_NORM : "blk.{bid}.altup_router_norm" , # gemma3n
728- MODEL_TENSOR .LAUREL_L : "blk.{bid}.laurel_l" , # gemma3n
729- MODEL_TENSOR .LAUREL_R : "blk.{bid}.laurel_r" , # gemma3n
730- MODEL_TENSOR .LAUREL_POST_NORM : "blk.{bid}.laurel_post_norm" , # gemma3n
731684 MODEL_TENSOR .SSM_IN : "blk.{bid}.ssm_in" ,
732685 MODEL_TENSOR .SSM_CONV1D : "blk.{bid}.ssm_conv1d" ,
733686 MODEL_TENSOR .SSM_X : "blk.{bid}.ssm_x" ,
@@ -1131,18 +1084,6 @@ class MODEL_TENSOR(IntEnum):
11311084 MODEL_TENSOR .FFN_UP_EXP ,
11321085 MODEL_TENSOR .LAYER_OUT_NORM ,
11331086 ],
1134- MODEL_ARCH .NEO_BERT : [
1135- MODEL_TENSOR .TOKEN_EMBD ,
1136- MODEL_TENSOR .ATTN_NORM ,
1137- MODEL_TENSOR .ATTN_QKV ,
1138- MODEL_TENSOR .ATTN_OUT ,
1139- MODEL_TENSOR .FFN_NORM ,
1140- MODEL_TENSOR .FFN_DOWN ,
1141- MODEL_TENSOR .FFN_UP ,
1142- MODEL_TENSOR .ENC_OUTPUT_NORM ,
1143- MODEL_TENSOR .CLS ,
1144- MODEL_TENSOR .CLS_OUT ,
1145- ],
11461087 MODEL_ARCH .JINA_BERT_V2 : [
11471088 MODEL_TENSOR .TOKEN_EMBD ,
11481089 MODEL_TENSOR .TOKEN_EMBD_NORM ,
@@ -1533,41 +1474,6 @@ class MODEL_TENSOR(IntEnum):
15331474 MODEL_TENSOR .FFN_PRE_NORM ,
15341475 MODEL_TENSOR .FFN_POST_NORM ,
15351476 ],
1536- MODEL_ARCH .GEMMA3N : [
1537- MODEL_TENSOR .TOKEN_EMBD ,
1538- MODEL_TENSOR .OUTPUT ,
1539- MODEL_TENSOR .OUTPUT_NORM ,
1540- MODEL_TENSOR .ATTN_Q ,
1541- MODEL_TENSOR .ATTN_Q_NORM ,
1542- MODEL_TENSOR .ATTN_K ,
1543- MODEL_TENSOR .ATTN_K_NORM ,
1544- MODEL_TENSOR .ATTN_V ,
1545- MODEL_TENSOR .ATTN_OUT ,
1546- MODEL_TENSOR .FFN_GATE ,
1547- MODEL_TENSOR .FFN_DOWN ,
1548- MODEL_TENSOR .FFN_UP ,
1549- MODEL_TENSOR .ATTN_NORM ,
1550- MODEL_TENSOR .ATTN_POST_NORM ,
1551- MODEL_TENSOR .FFN_PRE_NORM ,
1552- MODEL_TENSOR .FFN_POST_NORM ,
1553- # altup / laurel
1554- MODEL_TENSOR .PER_LAYER_TOKEN_EMBD ,
1555- MODEL_TENSOR .PER_LAYER_MODEL_PROJ ,
1556- MODEL_TENSOR .PER_LAYER_INP_GATE ,
1557- MODEL_TENSOR .PER_LAYER_PROJ ,
1558- MODEL_TENSOR .PER_LAYER_PROJ_NORM ,
1559- MODEL_TENSOR .PER_LAYER_POST_NORM ,
1560- MODEL_TENSOR .ALTUP_PROJ ,
1561- MODEL_TENSOR .ALTUP_UNEMBD_PROJ ,
1562- MODEL_TENSOR .ALTUP_CORRECT_COEF ,
1563- MODEL_TENSOR .ALTUP_CORRECT_SCALE ,
1564- MODEL_TENSOR .ALTUP_PREDICT_COEF ,
1565- MODEL_TENSOR .ALTUP_ROUTER ,
1566- MODEL_TENSOR .ALTUP_ROUTER_NORM ,
1567- MODEL_TENSOR .LAUREL_L ,
1568- MODEL_TENSOR .LAUREL_R ,
1569- MODEL_TENSOR .LAUREL_POST_NORM ,
1570- ],
15711477 MODEL_ARCH .STARCODER2 : [
15721478 MODEL_TENSOR .TOKEN_EMBD ,
15731479 MODEL_TENSOR .OUTPUT_NORM ,
@@ -2182,35 +2088,6 @@ class MODEL_TENSOR(IntEnum):
21822088 MODEL_TENSOR .FFN_UP_EXP ,
21832089 MODEL_TENSOR .FFN_UP_SHEXP ,
21842090 ],
2185- MODEL_ARCH .ARCEE : [
2186- MODEL_TENSOR .TOKEN_EMBD ,
2187- MODEL_TENSOR .OUTPUT_NORM ,
2188- MODEL_TENSOR .OUTPUT ,
2189- MODEL_TENSOR .ROPE_FREQS ,
2190- MODEL_TENSOR .ATTN_NORM ,
2191- MODEL_TENSOR .ATTN_Q ,
2192- MODEL_TENSOR .ATTN_K ,
2193- MODEL_TENSOR .ATTN_V ,
2194- MODEL_TENSOR .ATTN_OUT ,
2195- MODEL_TENSOR .ATTN_ROT_EMBD ,
2196- MODEL_TENSOR .FFN_NORM ,
2197- MODEL_TENSOR .FFN_DOWN ,
2198- MODEL_TENSOR .FFN_UP ,
2199- ],
2200- MODEL_ARCH .ERNIE4_5 : [
2201- MODEL_TENSOR .TOKEN_EMBD ,
2202- MODEL_TENSOR .OUTPUT_NORM ,
2203- MODEL_TENSOR .OUTPUT ,
2204- MODEL_TENSOR .ATTN_NORM ,
2205- MODEL_TENSOR .ATTN_Q ,
2206- MODEL_TENSOR .ATTN_K ,
2207- MODEL_TENSOR .ATTN_V ,
2208- MODEL_TENSOR .ATTN_OUT ,
2209- MODEL_TENSOR .FFN_NORM ,
2210- MODEL_TENSOR .FFN_GATE ,
2211- MODEL_TENSOR .FFN_DOWN ,
2212- MODEL_TENSOR .FFN_UP ,
2213- ],
22142091 # TODO
22152092}
22162093
0 commit comments