ggml-org
diff --git a/‎convert_hf_to_gguf.py‎
Lines changed: 2 additions & 5 deletions b/‎convert_hf_to_gguf.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎convert_hf_to_gguf_update.py‎
Lines changed: 0 additions & 1 deletion b/‎convert_hf_to_gguf_update.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎models/ggml-vocab-modern-bert.gguf‎
-1.06 MB b/‎models/ggml-vocab-modern-bert.gguf‎
-1.06 MB
diff --git a/‎models/ggml-vocab-modern-bert.gguf.inp‎
Lines changed: 0 additions & 112 deletions b/‎models/ggml-vocab-modern-bert.gguf.inp‎
Lines changed: 0 additions & 112 deletions
diff --git a/‎models/ggml-vocab-modern-bert.gguf.out‎
Lines changed: 0 additions & 46 deletions b/‎models/ggml-vocab-modern-bert.gguf.out‎
Lines changed: 0 additions & 46 deletions
diff --git a/‎src/llama-hparams.h‎
Lines changed: 1 addition & 1 deletion b/‎src/llama-hparams.h‎
Lines changed: 1 addition & 1 deletion
@@ -809,9 +809,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
         if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
             # ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
             res = "minerva-7b"
-        if chkhsh == "a0b64b4385f123663873756336c085744376d015ff328bb1d901598f63c44152":
-            # ref: https://huggingface.co/answerdotai/ModernBERT-base
-            res = "modern-bert"
 
         if res is None:
             logger.warning("\n")
@@ -3941,14 +3938,14 @@ class ModernBertModel(BertModel):
     model_arch = gguf.MODEL_ARCH.MODERN_BERT
 
     def set_gguf_parameters(self):
+        super().set_gguf_parameters()
+        self._try_set_pooling_type()
         self.gguf_writer.add_sliding_window(self.hparams["local_attention"])
         self.gguf_writer.add_rope_freq_base(self.hparams["global_rope_theta"])
         self.gguf_writer.add_rope_freq_base_swa(self.hparams["local_rope_theta"])
         self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
         self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
 
-        super().set_gguf_parameters()
-
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
         # These layers act as MLM head, so we don't need them
         if name.startswith("decoder."):
 
@@ -128,7 +128,6 @@ class TOKENIZER_TYPE(IntEnum):
     {"name": "llama4",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
     {"name": "pixtral",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
     {"name": "seed-coder",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
-    {"name": "modern-bert",      "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/answerdotai/ModernBERT-base", },
 ]
 
 # some models are known to be broken upstream, so we will skip them as exceptions
 
@@ -166,7 +166,7 @@ struct llama_hparams {
     // The remainder parameter specifies which position in the pattern is dense
     // example: n_pattern = 3, remainder = 2
     //   il == 0: swa     (0 % 3 = 0, which is not equal to 2)
-    //   il == 1: swa     (1 % 3 = 1, which is not equal to 2) 
+    //   il == 1: swa     (1 % 3 = 1, which is not equal to 2)
     //   il == 2: dense   (2 % 3 = 2, which equals 2)
     //   il == 3: swa     (3 % 3 = 0, which is not equal to 2)
     //   il == 4: swa     (4 % 3 = 1, which is not equal to 2)
Original file line number	Diff line number	Diff line change
`@@ -128,7 +128,6 @@ class TOKENIZER_TYPE(IntEnum):`
`128`	`128`	`{"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },`
`129`	`129`	`{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },`
`130`	`130`	`{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },`
`131`		`- {"name": "modern-bert", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/answerdotai/ModernBERT-base", },`
`132`	`131`	`]`
`133`	`132`
`134`	`133`	`# some models are known to be broken upstream, so we will skip them as exceptions`