@@ -679,7 +679,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
679679 hparams.f_max_alibi_bias = 8.0f;
680680 } break;
681681 case LLM_ARCH_BERT:
682- case LLM_ARCH_DISTIL_BERT:
683682 {
684683 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
685684 ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
@@ -2111,14 +2110,13 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
21112110 }
21122111 } break;
21132112 case LLM_ARCH_BERT:
2114- case LLM_ARCH_DISTIL_BERT:
21152113 case LLM_ARCH_NOMIC_BERT:
21162114 case LLM_ARCH_NOMIC_BERT_MOE:
21172115 {
21182116 tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
21192117 type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, TENSOR_NOT_REQUIRED);
21202118
2121- if (arch == LLM_ARCH_BERT || arch == LLM_ARCH_DISTIL_BERT ) {
2119+ if (arch == LLM_ARCH_BERT) {
21222120 pos_embd = create_tensor(tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, n_ctx_train}, 0);
21232121
21242122 cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
@@ -2162,7 +2160,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
21622160 layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
21632161 layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
21642162
2165- if (arch == LLM_ARCH_BERT || arch == LLM_ARCH_DISTIL_BERT || arch == LLM_ARCH_NOMIC_BERT_MOE) {
2163+ if (arch == LLM_ARCH_BERT || arch == LLM_ARCH_NOMIC_BERT_MOE) {
21662164 layer.bo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, 0);
21672165 layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, 0);
21682166 layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, 0);
@@ -5891,7 +5889,7 @@ struct llm_build_bert : public llm_graph_context {
58915889 ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0);
58925890 inpL = ggml_add(ctx0, inpL, type_row0);
58935891 }
5894- if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_DISTIL_BERT ) {
5892+ if (model.arch == LLM_ARCH_BERT) {
58955893 inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, inp_pos), inpL);
58965894 }
58975895 cb(inpL, "inp_embd", -1);
@@ -6008,7 +6006,7 @@ struct llm_build_bert : public llm_graph_context {
60086006 0.0f,
60096007 LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, il);
60106008 cb(cur, "ffn_moe_out", il);
6011- } else if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_DISTIL_BERT || model.arch == LLM_ARCH_NOMIC_BERT_MOE) {
6009+ } else if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_NOMIC_BERT_MOE) {
60126010 cur = build_ffn(cur,
60136011 model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
60146012 NULL, NULL, NULL,
@@ -13191,7 +13189,6 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1319113189
1319213190 switch (arch) {
1319313191 case LLM_ARCH_BERT:
13194- case LLM_ARCH_DISTIL_BERT:
1319513192 case LLM_ARCH_JINA_BERT_V2:
1319613193 case LLM_ARCH_NOMIC_BERT:
1319713194 case LLM_ARCH_NOMIC_BERT_MOE:
@@ -13298,7 +13295,6 @@ llm_graph_result_ptr llama_model::build_graph(
1329813295 llm = std::make_unique<llm_build_refact>(*this, params, gf);
1329913296 } break;
1330013297 case LLM_ARCH_BERT:
13301- case LLM_ARCH_DISTIL_BERT:
1330213298 case LLM_ARCH_JINA_BERT_V2:
1330313299 case LLM_ARCH_NOMIC_BERT:
1330413300 case LLM_ARCH_NOMIC_BERT_MOE:
@@ -13666,7 +13662,6 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1366613662 case LLM_ARCH_GROK:
1366713663 case LLM_ARCH_DBRX:
1366813664 case LLM_ARCH_BERT:
13669- case LLM_ARCH_DISTIL_BERT:
1367013665 case LLM_ARCH_NOMIC_BERT:
1367113666 case LLM_ARCH_NOMIC_BERT_MOE:
1367213667 case LLM_ARCH_STABLELM:
0 commit comments