@@ -91,8 +91,6 @@ class LLM:
9191 CONTEXT_LENGTH = "{arch}.context_length"
9292 EMBEDDING_LENGTH = "{arch}.embedding_length"
9393 FEATURES_LENGTH = "{arch}.features_length"
94- POSNET_LENGTH = "{arch}.posnet_length"
95- CONVNEXT_LENGTH = "{arch}.convnext_length"
9694 BLOCK_COUNT = "{arch}.block_count"
9795 LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
9896 FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
@@ -160,6 +158,14 @@ class SSM:
160158 class WKV :
161159 HEAD_SIZE = "{arch}.wkv.head_size"
162160
161+ class PosNet :
162+ EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
163+ BLOCK_COUNT = "{arch}.posnet.block_count"
164+
165+ class ConvNext :
166+ EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
167+ BLOCK_COUNT = "{arch}.convnext.block_count"
168+
163169 class Tokenizer :
164170 MODEL = "tokenizer.ggml.model"
165171 PRE = "tokenizer.ggml.pre"
@@ -377,21 +383,21 @@ class MODEL_TENSOR(IntEnum):
377383 CLS = auto () # classifier
378384 CLS_OUT = auto () # classifier output projection
379385 CONV1D = auto ()
380- CONV_NEXT_DW = auto ()
381- CONV_NEXT_NORM = auto ()
382- CONV_NEXT_PW1 = auto ()
383- CONV_NEXT_PW2 = auto ()
384- CONV_NEXT_GAMMA = auto ()
385- POS_NET_CONV1 = auto ()
386- POS_NET_CONV2 = auto ()
387- POS_NET_NORM = auto ()
388- POS_NET_NORM1 = auto ()
389- POS_NET_NORM2 = auto ()
390- POS_NET_ATTN_NORM = auto ()
391- POS_NET_ATTN_Q = auto ()
392- POS_NET_ATTN_K = auto ()
393- POS_NET_ATTN_V = auto ()
394- POS_NET_ATTN_OUT = auto ()
386+ CONVNEXT_DW = auto ()
387+ CONVNEXT_NORM = auto ()
388+ CONVNEXT_PW1 = auto ()
389+ CONVNEXT_PW2 = auto ()
390+ CONVNEXT_GAMMA = auto ()
391+ POSNET_CONV1 = auto ()
392+ POSNET_CONV2 = auto ()
393+ POSNET_NORM = auto ()
394+ POSNET_NORM1 = auto ()
395+ POSNET_NORM2 = auto ()
396+ POSNET_ATTN_NORM = auto ()
397+ POSNET_ATTN_Q = auto ()
398+ POSNET_ATTN_K = auto ()
399+ POSNET_ATTN_V = auto ()
400+ POSNET_ATTN_OUT = auto ()
395401
396402
397403MODEL_ARCH_NAMES : dict [MODEL_ARCH , str ] = {
@@ -558,21 +564,21 @@ class MODEL_TENSOR(IntEnum):
558564 MODEL_TENSOR .CLS : "cls" ,
559565 MODEL_TENSOR .CLS_OUT : "cls.output" ,
560566 MODEL_TENSOR .CONV1D : "conv1d" ,
561- MODEL_TENSOR .CONV_NEXT_DW : "conv_next .{bid}.dw" ,
562- MODEL_TENSOR .CONV_NEXT_NORM : "conv_next .{bid}.norm" ,
563- MODEL_TENSOR .CONV_NEXT_PW1 : "conv_next .{bid}.pw1" ,
564- MODEL_TENSOR .CONV_NEXT_PW2 : "conv_next .{bid}.pw2" ,
565- MODEL_TENSOR .CONV_NEXT_GAMMA : "conv_next .{bid}.gamma" ,
566- MODEL_TENSOR .POS_NET_CONV1 : "pos_net .{bid}.conv1" ,
567- MODEL_TENSOR .POS_NET_CONV2 : "pos_net .{bid}.conv2" ,
568- MODEL_TENSOR .POS_NET_NORM : "pos_net .{bid}.norm" ,
569- MODEL_TENSOR .POS_NET_NORM1 : "pos_net .{bid}.norm1" ,
570- MODEL_TENSOR .POS_NET_NORM2 : "pos_net .{bid}.norm2" ,
571- MODEL_TENSOR .POS_NET_ATTN_NORM : "pos_net .{bid}.attn_norm" ,
572- MODEL_TENSOR .POS_NET_ATTN_Q : "pos_net .{bid}.attn_q" ,
573- MODEL_TENSOR .POS_NET_ATTN_K : "pos_net .{bid}.attn_k" ,
574- MODEL_TENSOR .POS_NET_ATTN_V : "pos_net .{bid}.attn_v" ,
575- MODEL_TENSOR .POS_NET_ATTN_OUT : "pos_net .{bid}.attn_output" ,
567+ MODEL_TENSOR .CONVNEXT_DW : "convnext .{bid}.dw" ,
568+ MODEL_TENSOR .CONVNEXT_NORM : "convnext .{bid}.norm" ,
569+ MODEL_TENSOR .CONVNEXT_PW1 : "convnext .{bid}.pw1" ,
570+ MODEL_TENSOR .CONVNEXT_PW2 : "convnext .{bid}.pw2" ,
571+ MODEL_TENSOR .CONVNEXT_GAMMA : "convnext .{bid}.gamma" ,
572+ MODEL_TENSOR .POSNET_CONV1 : "posnet .{bid}.conv1" ,
573+ MODEL_TENSOR .POSNET_CONV2 : "posnet .{bid}.conv2" ,
574+ MODEL_TENSOR .POSNET_NORM : "posnet .{bid}.norm" ,
575+ MODEL_TENSOR .POSNET_NORM1 : "posnet .{bid}.norm1" ,
576+ MODEL_TENSOR .POSNET_NORM2 : "posnet .{bid}.norm2" ,
577+ MODEL_TENSOR .POSNET_ATTN_NORM : "posnet .{bid}.attn_norm" ,
578+ MODEL_TENSOR .POSNET_ATTN_Q : "posnet .{bid}.attn_q" ,
579+ MODEL_TENSOR .POSNET_ATTN_K : "posnet .{bid}.attn_k" ,
580+ MODEL_TENSOR .POSNET_ATTN_V : "posnet .{bid}.attn_v" ,
581+ MODEL_TENSOR .POSNET_ATTN_OUT : "posnet .{bid}.attn_output" ,
576582}
577583
578584MODEL_TENSORS : dict [MODEL_ARCH , list [MODEL_TENSOR ]] = {
@@ -1415,23 +1421,23 @@ class MODEL_TENSOR(IntEnum):
14151421 MODEL_TENSOR .TOKEN_EMBD ,
14161422 MODEL_TENSOR .TOKEN_EMBD_NORM ,
14171423 MODEL_TENSOR .CONV1D ,
1418- MODEL_TENSOR .CONV_NEXT_DW ,
1419- MODEL_TENSOR .CONV_NEXT_NORM ,
1420- MODEL_TENSOR .CONV_NEXT_PW1 ,
1421- MODEL_TENSOR .CONV_NEXT_PW2 ,
1422- MODEL_TENSOR .CONV_NEXT_GAMMA ,
1424+ MODEL_TENSOR .CONVNEXT_DW ,
1425+ MODEL_TENSOR .CONVNEXT_NORM ,
1426+ MODEL_TENSOR .CONVNEXT_PW1 ,
1427+ MODEL_TENSOR .CONVNEXT_PW2 ,
1428+ MODEL_TENSOR .CONVNEXT_GAMMA ,
14231429 MODEL_TENSOR .OUTPUT ,
14241430 MODEL_TENSOR .OUTPUT_NORM ,
1425- MODEL_TENSOR .POS_NET_CONV1 ,
1426- MODEL_TENSOR .POS_NET_CONV2 ,
1427- MODEL_TENSOR .POS_NET_NORM ,
1428- MODEL_TENSOR .POS_NET_NORM1 ,
1429- MODEL_TENSOR .POS_NET_NORM2 ,
1430- MODEL_TENSOR .POS_NET_ATTN_NORM ,
1431- MODEL_TENSOR .POS_NET_ATTN_Q ,
1432- MODEL_TENSOR .POS_NET_ATTN_K ,
1433- MODEL_TENSOR .POS_NET_ATTN_V ,
1434- MODEL_TENSOR .POS_NET_ATTN_OUT ,
1431+ MODEL_TENSOR .POSNET_CONV1 ,
1432+ MODEL_TENSOR .POSNET_CONV2 ,
1433+ MODEL_TENSOR .POSNET_NORM ,
1434+ MODEL_TENSOR .POSNET_NORM1 ,
1435+ MODEL_TENSOR .POSNET_NORM2 ,
1436+ MODEL_TENSOR .POSNET_ATTN_NORM ,
1437+ MODEL_TENSOR .POSNET_ATTN_Q ,
1438+ MODEL_TENSOR .POSNET_ATTN_K ,
1439+ MODEL_TENSOR .POSNET_ATTN_V ,
1440+ MODEL_TENSOR .POSNET_ATTN_OUT ,
14351441 ],
14361442 # TODO
14371443}
0 commit comments