@@ -592,6 +592,8 @@ enum llm_tensor {
592592 LLM_TENSOR_ENC_FFN_DOWN,
593593 LLM_TENSOR_ENC_FFN_UP,
594594 LLM_TENSOR_ENC_OUTPUT_NORM,
595+ LLM_TENSOR_CLS,
596+ LLM_TENSOR_CLS_OUT,
595597};
596598
597599static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES = {
@@ -779,6 +781,8 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
779781 { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
780782 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
781783 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
784+ { LLM_TENSOR_CLS, "cls" },
785+ { LLM_TENSOR_CLS_OUT, "cls.output" },
782786 },
783787 },
784788 {
@@ -2829,6 +2833,12 @@ struct llama_model {
28292833 struct ggml_tensor * output_b;
28302834 struct ggml_tensor * output_norm_enc;
28312835
2836+ // classifier
2837+ struct ggml_tensor * cls;
2838+ struct ggml_tensor * cls_b;
2839+ struct ggml_tensor * cls_out;
2840+ struct ggml_tensor * cls_out_b;
2841+
28322842 std::vector<llama_layer> layers;
28332843
28342844 llama_split_mode split_mode;
@@ -7228,6 +7238,12 @@ static bool llm_load_tensors(
72287238
72297239 if (model.arch == LLM_ARCH_BERT) {
72307240 model.pos_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, n_ctx_train});
7241+
7242+ model.cls = ml.create_tensor(ctx_output, tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
7243+ model.cls_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_CLS, "bias"), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
7244+
7245+ model.cls_out = ml.create_tensor(ctx_output, tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, 1}, llama_model_loader::TENSOR_NOT_REQUIRED);
7246+ model.cls_out_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_CLS_OUT, "bias"), {1}, llama_model_loader::TENSOR_NOT_REQUIRED);
72317247 }
72327248
72337249 model.tok_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd});
0 commit comments