@@ -78,7 +78,7 @@ class ModelBase:
7878 # subclasses should define this!
7979 model_arch: gguf.MODEL_ARCH
8080
81- def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False,
81+ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, *, is_big_endian: bool = False,
8282 use_temp_file: bool = False, eager: bool = False,
8383 metadata_override: Path | None = None, model_name: str | None = None,
8484 split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
@@ -454,13 +454,6 @@ def from_model_architecture(cls, arch: str, model_type = ModelType.TEXT) -> type
454454
455455
456456class TextModel(ModelBase):
457- @classmethod
458- def __init_subclass__(cls):
459- # can't use an abstract property, because overriding it without type errors
460- # would require using decorated functions instead of simply defining the property
461- if "model_arch" not in cls.__dict__:
462- raise TypeError(f"Missing property 'model_arch' for {cls.__name__!r}")
463-
464457 def set_vocab(self):
465458 self._set_vocab_gpt2()
466459
@@ -3373,14 +3366,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
33733366
33743367 return [(self.map_tensor_name(name), data_torch)]
33753368
3376-
3377- @ModelBase.register("RobertaModel")
3378- class RobertaModel(BertModel):
3379- model_arch = gguf.MODEL_ARCH.BERT
3380-
3381- def __init__(self, *args, **kwargs):
3382- super().__init__(*args, **kwargs)
3383-
3369+ def _xlmroberta_tokenizer_init(self) -> None:
33843370 # we need the pad_token_id to know how to chop down position_embd matrix
33853371 if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
33863372 self._position_offset = 1 + pad_token_id
@@ -3389,82 +3375,7 @@ def __init__(self, *args, **kwargs):
33893375 else:
33903376 self._position_offset = None
33913377
3392- def set_vocab(self):
3393- """Support BPE tokenizers for roberta models"""
3394- bpe_tok_path = self.dir_model / "tokenizer.json"
3395- if bpe_tok_path.exists():
3396- self._set_vocab_gpt2()
3397- self.gguf_writer.add_add_bos_token(True)
3398- self.gguf_writer.add_add_eos_token(True)
3399-
3400- # we need this to validate the size of the token_type embeddings
3401- # though currently we are passing all zeros to the token_type embeddings
3402- # "Sequence A" or "Sequence B"
3403- self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
3404-
3405- else:
3406- return super().set_vocab()
3407-
3408- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3409- # if name starts with "roberta.", remove the prefix
3410- # e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
3411- if name.startswith("roberta."):
3412- name = name[8:]
3413-
3414- # position embeddings start at pad_token_id + 1, so just chop down the weight tensor
3415- if name == "embeddings.position_embeddings.weight":
3416- if self._position_offset is not None:
3417- data_torch = data_torch[self._position_offset:,:]
3418-
3419- return super().modify_tensors(data_torch, name, bid)
3420-
3421-
3422- @ModelBase.register("NomicBertModel")
3423- class NomicBertModel(BertModel):
3424- model_arch = gguf.MODEL_ARCH.NOMIC_BERT
3425-
3426- def __init__(self, *args, **kwargs):
3427- super().__init__(*args, **kwargs)
3428-
3429- # the HF config claims n_ctx=8192, but it uses RoPE scaling
3430- self.hparams["n_ctx"] = 2048
3431-
3432- # SwigLU activation
3433- assert self.hparams["activation_function"] == "swiglu"
3434- # this doesn't do anything in the HF version
3435- assert self.hparams["causal"] is False
3436- # no bias tensors
3437- assert self.hparams["qkv_proj_bias"] is False
3438- assert self.hparams["mlp_fc1_bias"] is False
3439- assert self.hparams["mlp_fc2_bias"] is False
3440- # norm at end of layer
3441- assert self.hparams["prenorm"] is False
3442- # standard RoPE
3443- assert self.hparams["rotary_emb_fraction"] == 1.0
3444- assert self.hparams["rotary_emb_interleaved"] is False
3445- assert self.hparams["rotary_emb_scale_base"] is None
3446-
3447- def set_gguf_parameters(self):
3448- super().set_gguf_parameters()
3449- self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
3450-
3451-
3452- @ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
3453- class XLMRobertaModel(BertModel):
3454- model_arch = gguf.MODEL_ARCH.BERT
3455-
3456- def __init__(self, *args, **kwargs):
3457- super().__init__(*args, **kwargs)
3458-
3459- # we need the pad_token_id to know how to chop down position_embd matrix
3460- if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
3461- self._position_offset = 1 + pad_token_id
3462- if "max_position_embeddings" in self.hparams:
3463- self.hparams["max_position_embeddings"] -= self._position_offset
3464- else:
3465- self._position_offset = None
3466-
3467- def set_vocab(self):
3378+ def _xlmroberta_set_vocab(self) -> None:
34683379 # to avoid TypeError: Descriptors cannot be created directly
34693380 # exception when importing sentencepiece_model_pb2
34703381 os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
@@ -3546,6 +3457,138 @@ def set_vocab(self):
35463457 self.gguf_writer.add_add_bos_token(True)
35473458 self.gguf_writer.add_add_eos_token(True)
35483459
3460+
3461+ @ModelBase.register("RobertaModel")
3462+ class RobertaModel(BertModel):
3463+ model_arch = gguf.MODEL_ARCH.BERT
3464+
3465+ def __init__(self, *args, **kwargs):
3466+ super().__init__(*args, **kwargs)
3467+
3468+ # we need the pad_token_id to know how to chop down position_embd matrix
3469+ if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
3470+ self._position_offset = 1 + pad_token_id
3471+ if "max_position_embeddings" in self.hparams:
3472+ self.hparams["max_position_embeddings"] -= self._position_offset
3473+ else:
3474+ self._position_offset = None
3475+
3476+ def set_vocab(self):
3477+ """Support BPE tokenizers for roberta models"""
3478+ bpe_tok_path = self.dir_model / "tokenizer.json"
3479+ if bpe_tok_path.exists():
3480+ self._set_vocab_gpt2()
3481+ self.gguf_writer.add_add_bos_token(True)
3482+ self.gguf_writer.add_add_eos_token(True)
3483+
3484+ # we need this to validate the size of the token_type embeddings
3485+ # though currently we are passing all zeros to the token_type embeddings
3486+ # "Sequence A" or "Sequence B"
3487+ self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
3488+
3489+ else:
3490+ return super().set_vocab()
3491+
3492+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3493+ # if name starts with "roberta.", remove the prefix
3494+ # e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
3495+ if name.startswith("roberta."):
3496+ name = name[8:]
3497+
3498+ # position embeddings start at pad_token_id + 1, so just chop down the weight tensor
3499+ if name == "embeddings.position_embeddings.weight":
3500+ if self._position_offset is not None:
3501+ data_torch = data_torch[self._position_offset:,:]
3502+
3503+ return super().modify_tensors(data_torch, name, bid)
3504+
3505+
3506+ @ModelBase.register("NomicBertModel")
3507+ class NomicBertModel(BertModel):
3508+ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, **kwargs: Any):
3509+ hparams = kwargs.pop("hparams", None)
3510+ if hparams is None:
3511+ hparams = ModelBase.load_hparams(dir_model)
3512+
3513+ self.is_moe = bool(hparams.get("moe_every_n_layers"))
3514+ self.model_arch = gguf.MODEL_ARCH.NOMIC_BERT_MOE if self.is_moe else gguf.MODEL_ARCH.NOMIC_BERT
3515+
3516+ super().__init__(dir_model, ftype, fname_out, hparams=hparams, **kwargs)
3517+
3518+ self._tokenizer_is_xlmroberta = self._is_tokenizer_xlmroberta()
3519+ if self._tokenizer_is_xlmroberta:
3520+ self._xlmroberta_tokenizer_init()
3521+
3522+ # the HF config claims n_ctx=8192, but it uses RoPE scaling
3523+ self.hparams["n_ctx"] = 2048
3524+
3525+ assert self.hparams["activation_function"] == "gelu" if self.is_moe else "swiglu"
3526+
3527+ # this doesn't do anything in the HF version
3528+ assert self.hparams["causal"] is False
3529+ # no bias tensors unless MoE
3530+ assert self.hparams["qkv_proj_bias"] == self.is_moe
3531+ assert self.hparams["mlp_fc1_bias"] == self.is_moe
3532+ assert self.hparams["mlp_fc2_bias"] == self.is_moe
3533+
3534+ # norm at end of layer
3535+ assert self.hparams["prenorm"] is False
3536+ # standard RoPE
3537+ assert self.hparams["rotary_emb_fraction"] == 1.0
3538+ assert self.hparams["rotary_emb_interleaved"] is False
3539+ assert self.hparams["rotary_emb_scale_base"] is None
3540+
3541+ def set_vocab(self) -> None:
3542+ if self._tokenizer_is_xlmroberta:
3543+ return self._xlmroberta_set_vocab()
3544+ return super().set_vocab()
3545+
3546+ def modify_tensors(self, data_torch: torch.Tensor, name: str, bid: int | None) -> Iterable[tuple[str, torch.Tensor]]:
3547+ # If the tensor is an experts bias tensor, skip it by returning an empty list.
3548+ if "mlp.experts.bias" in name:
3549+ return [] # Explicitly return an empty list.
3550+
3551+ if "mlp.experts.mlp.w1" in name:
3552+ data_torch = data_torch.view(self.hparams["num_experts"], self.hparams["n_inner"], self.hparams["n_embd"])
3553+ name += ".weight"
3554+
3555+ if "mlp.experts.mlp.w2" in name:
3556+ data_torch = data_torch.view(self.hparams["num_experts"], self.hparams["n_inner"], self.hparams["n_embd"])
3557+ data_torch = data_torch.transpose(1, 2)
3558+ name += ".weight"
3559+
3560+ return [(self.map_tensor_name(name), data_torch)]
3561+
3562+ def set_gguf_parameters(self):
3563+ super().set_gguf_parameters()
3564+ self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
3565+ if self.is_moe:
3566+ self.gguf_writer.add_moe_every_n_layers(self.hparams["moe_every_n_layers"])
3567+ self.gguf_writer.add_expert_count(self.hparams["num_experts"])
3568+ self.gguf_writer.add_expert_used_count(self.hparams["moe_top_k"])
3569+
3570+ def _is_tokenizer_xlmroberta(self) -> bool:
3571+ with open(self.dir_model / "tokenizer.json") as f:
3572+ tokenizer_json = json.load(f)
3573+ toktyp = tokenizer_json["model"]["type"]
3574+ if toktyp == "Unigram":
3575+ return True
3576+ if toktyp == "WordPiece":
3577+ return False
3578+ raise ValueError(f"unknown tokenizer: {toktyp}")
3579+
3580+
3581+ @ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
3582+ class XLMRobertaModel(BertModel):
3583+ model_arch = gguf.MODEL_ARCH.BERT
3584+
3585+ def __init__(self, *args, **kwargs):
3586+ super().__init__(*args, **kwargs)
3587+ self._xlmroberta_tokenizer_init()
3588+
3589+ def set_vocab(self):
3590+ self._xlmroberta_set_vocab()
3591+
35493592 def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
35503593 # if name starts with "roberta.", remove the prefix
35513594 # e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
0 commit comments