added cls token per previous modern bert attempt, still working on checking out the rest

ryan-mangeno · ryan-mangeno · commit c73eb685fd4e · 2025-08-29T12:15:31.000-04:00
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
@@ -1185,6 +1185,8 @@ class MODEL_TENSOR(IntEnum):
         MODEL_TENSOR.FFN_UP,
         MODEL_TENSOR.FFN_DOWN,
         MODEL_TENSOR.FFN_NORM,
+        MODEL_TENSOR.CLS,
+        MODEL_TENSOR.CLS_OUT,
     ],
     MODEL_ARCH.NOMIC_BERT: [
         MODEL_TENSOR.TOKEN_EMBD,
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
@@ -519,6 +519,8 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
             { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
             { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
+            { LLM_TENSOR_CLS,             "cls" },
+            { LLM_TENSOR_CLS_OUT,         "cls.output" },
         },
     },
     {
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -2710,6 +2710,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                         layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
                         layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
                     }
+
+                    cls        = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
+                    cls_out   = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
+                    cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"),   {hparams.n_cls_out},         TENSOR_NOT_REQUIRED);
+
                 } break;
             case LLM_ARCH_NEO_BERT:
                 {

Original file line number	Diff line number	Diff line change
`@@ -519,6 +519,8 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N`
`519`	`519`	`{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },`
`520`	`520`	`{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },`
`521`	`521`	`{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },`
	`522`	`+ { LLM_TENSOR_CLS, "cls" },`
	`523`	`+ { LLM_TENSOR_CLS_OUT, "cls.output" },`
`522`	`524`	`},`
`523`	`525`	`},`
`524`	`526`	`{`
Original file line number	Diff line number	Diff line change
`@@ -2710,6 +2710,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) {`
`2710`	`2710`	`layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);`
`2711`	`2711`	`layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);`
`2712`	`2712`	`}`
	`2713`	`+`
	`2714`	`+ cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);`
	`2715`	`+ cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED);`
	`2716`	`+ cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED);`
	`2717`	`+`
`2713`	`2718`	`} break;`
`2714`	`2719`	`case LLM_ARCH_NEO_BERT:`
`2715`	`2720`	`{`