Skip to content

Commit 913100b

Browse files
committed
Use MODEL_ARCH.BERT for DistilBert
1 parent 50e068d commit 913100b

File tree

5 files changed

+5
-49
lines changed

5 files changed

+5
-49
lines changed

convert_hf_to_gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3909,7 +3909,7 @@ def _xlmroberta_set_vocab(self) -> None:
39093909

39103910
@ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification")
39113911
class DistilBertModel(BertModel):
3912-
model_arch = gguf.MODEL_ARCH.DISTIL_BERT
3912+
model_arch = gguf.MODEL_ARCH.BERT
39133913

39143914
def set_gguf_parameters(self):
39153915
self.gguf_writer.add_layer_norm_eps(1e-12)

gguf-py/gguf/constants.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,6 @@ class MODEL_ARCH(IntEnum):
289289
STARCODER = auto()
290290
REFACT = auto()
291291
BERT = auto()
292-
DISTIL_BERT = auto()
293292
NOMIC_BERT = auto()
294293
NOMIC_BERT_MOE = auto()
295294
JINA_BERT_V2 = auto()
@@ -570,7 +569,6 @@ class MODEL_TENSOR(IntEnum):
570569
MODEL_ARCH.STARCODER: "starcoder",
571570
MODEL_ARCH.REFACT: "refact",
572571
MODEL_ARCH.BERT: "bert",
573-
MODEL_ARCH.DISTIL_BERT: "distil-bert",
574572
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
575573
MODEL_ARCH.NOMIC_BERT_MOE: "nomic-bert-moe",
576574
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
@@ -1049,22 +1047,6 @@ class MODEL_TENSOR(IntEnum):
10491047
MODEL_TENSOR.CLS,
10501048
MODEL_TENSOR.CLS_OUT,
10511049
],
1052-
MODEL_ARCH.DISTIL_BERT: [
1053-
MODEL_TENSOR.TOKEN_EMBD,
1054-
MODEL_TENSOR.TOKEN_EMBD_NORM,
1055-
MODEL_TENSOR.POS_EMBD,
1056-
MODEL_TENSOR.OUTPUT_NORM,
1057-
MODEL_TENSOR.ATTN_OUT_NORM,
1058-
MODEL_TENSOR.ATTN_Q,
1059-
MODEL_TENSOR.ATTN_K,
1060-
MODEL_TENSOR.ATTN_V,
1061-
MODEL_TENSOR.ATTN_OUT,
1062-
MODEL_TENSOR.FFN_DOWN,
1063-
MODEL_TENSOR.FFN_UP,
1064-
MODEL_TENSOR.LAYER_OUT_NORM,
1065-
MODEL_TENSOR.CLS,
1066-
MODEL_TENSOR.CLS_OUT,
1067-
],
10681050
MODEL_ARCH.NOMIC_BERT: [
10691051
MODEL_TENSOR.TOKEN_EMBD,
10701052
MODEL_TENSOR.TOKEN_EMBD_NORM,

src/llama-arch.cpp

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
1818
{ LLM_ARCH_STARCODER, "starcoder" },
1919
{ LLM_ARCH_REFACT, "refact" },
2020
{ LLM_ARCH_BERT, "bert" },
21-
{ LLM_ARCH_DISTIL_BERT, "distil-bert" },
2221
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
2322
{ LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" },
2423
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
@@ -463,25 +462,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
463462
{ LLM_TENSOR_CLS_OUT, "cls.output" },
464463
},
465464
},
466-
{
467-
LLM_ARCH_DISTIL_BERT,
468-
{
469-
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
470-
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
471-
{ LLM_TENSOR_TOKEN_TYPES, "token_types" }, // not used
472-
{ LLM_TENSOR_POS_EMBD, "position_embd" },
473-
{ LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
474-
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
475-
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
476-
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
477-
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
478-
{ LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
479-
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
480-
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
481-
{ LLM_TENSOR_CLS, "cls" },
482-
{ LLM_TENSOR_CLS_OUT, "cls.output" },
483-
},
484-
},
485465
{
486466
LLM_ARCH_NOMIC_BERT,
487467
{

src/llama-arch.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ enum llm_arch {
2222
LLM_ARCH_STARCODER,
2323
LLM_ARCH_REFACT,
2424
LLM_ARCH_BERT,
25-
LLM_ARCH_DISTIL_BERT,
2625
LLM_ARCH_NOMIC_BERT,
2726
LLM_ARCH_NOMIC_BERT_MOE,
2827
LLM_ARCH_JINA_BERT_V2,

src/llama-model.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
679679
hparams.f_max_alibi_bias = 8.0f;
680680
} break;
681681
case LLM_ARCH_BERT:
682-
case LLM_ARCH_DISTIL_BERT:
683682
{
684683
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
685684
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
@@ -2111,14 +2110,13 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
21112110
}
21122111
} break;
21132112
case LLM_ARCH_BERT:
2114-
case LLM_ARCH_DISTIL_BERT:
21152113
case LLM_ARCH_NOMIC_BERT:
21162114
case LLM_ARCH_NOMIC_BERT_MOE:
21172115
{
21182116
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
21192117
type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, TENSOR_NOT_REQUIRED);
21202118

2121-
if (arch == LLM_ARCH_BERT || arch == LLM_ARCH_DISTIL_BERT) {
2119+
if (arch == LLM_ARCH_BERT) {
21222120
pos_embd = create_tensor(tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, n_ctx_train}, 0);
21232121

21242122
cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
@@ -2162,7 +2160,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
21622160
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
21632161
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
21642162

2165-
if (arch == LLM_ARCH_BERT || arch == LLM_ARCH_DISTIL_BERT || arch == LLM_ARCH_NOMIC_BERT_MOE) {
2163+
if (arch == LLM_ARCH_BERT || arch == LLM_ARCH_NOMIC_BERT_MOE) {
21662164
layer.bo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, 0);
21672165
layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, 0);
21682166
layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, 0);
@@ -5891,7 +5889,7 @@ struct llm_build_bert : public llm_graph_context {
58915889
ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0);
58925890
inpL = ggml_add(ctx0, inpL, type_row0);
58935891
}
5894-
if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_DISTIL_BERT) {
5892+
if (model.arch == LLM_ARCH_BERT) {
58955893
inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, inp_pos), inpL);
58965894
}
58975895
cb(inpL, "inp_embd", -1);
@@ -6008,7 +6006,7 @@ struct llm_build_bert : public llm_graph_context {
60086006
0.0f,
60096007
LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, il);
60106008
cb(cur, "ffn_moe_out", il);
6011-
} else if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_DISTIL_BERT || model.arch == LLM_ARCH_NOMIC_BERT_MOE) {
6009+
} else if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_NOMIC_BERT_MOE) {
60126010
cur = build_ffn(cur,
60136011
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
60146012
NULL, NULL, NULL,
@@ -13191,7 +13189,6 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
1319113189

1319213190
switch (arch) {
1319313191
case LLM_ARCH_BERT:
13194-
case LLM_ARCH_DISTIL_BERT:
1319513192
case LLM_ARCH_JINA_BERT_V2:
1319613193
case LLM_ARCH_NOMIC_BERT:
1319713194
case LLM_ARCH_NOMIC_BERT_MOE:
@@ -13298,7 +13295,6 @@ llm_graph_result_ptr llama_model::build_graph(
1329813295
llm = std::make_unique<llm_build_refact>(*this, params, gf);
1329913296
} break;
1330013297
case LLM_ARCH_BERT:
13301-
case LLM_ARCH_DISTIL_BERT:
1330213298
case LLM_ARCH_JINA_BERT_V2:
1330313299
case LLM_ARCH_NOMIC_BERT:
1330413300
case LLM_ARCH_NOMIC_BERT_MOE:
@@ -13666,7 +13662,6 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1366613662
case LLM_ARCH_GROK:
1366713663
case LLM_ARCH_DBRX:
1366813664
case LLM_ARCH_BERT:
13669-
case LLM_ARCH_DISTIL_BERT:
1367013665
case LLM_ARCH_NOMIC_BERT:
1367113666
case LLM_ARCH_NOMIC_BERT_MOE:
1367213667
case LLM_ARCH_STABLELM:

0 commit comments

Comments
 (0)