Skip to content

Commit e9bff42

Browse files
committed
Merge branch 'master' into xsn/mtmd_qwen2vl
2 parents d23fdc2 + b6ce743 commit e9bff42

File tree

13 files changed

+462
-190
lines changed

13 files changed

+462
-190
lines changed

convert_hf_to_gguf.py

Lines changed: 135 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class ModelBase:
7878
# subclasses should define this!
7979
model_arch: gguf.MODEL_ARCH
8080

81-
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False,
81+
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, *, is_big_endian: bool = False,
8282
use_temp_file: bool = False, eager: bool = False,
8383
metadata_override: Path | None = None, model_name: str | None = None,
8484
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
@@ -454,13 +454,6 @@ def from_model_architecture(cls, arch: str, model_type = ModelType.TEXT) -> type
454454

455455

456456
class TextModel(ModelBase):
457-
@classmethod
458-
def __init_subclass__(cls):
459-
# can't use an abstract property, because overriding it without type errors
460-
# would require using decorated functions instead of simply defining the property
461-
if "model_arch" not in cls.__dict__:
462-
raise TypeError(f"Missing property 'model_arch' for {cls.__name__!r}")
463-
464457
def set_vocab(self):
465458
self._set_vocab_gpt2()
466459

@@ -3373,14 +3366,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
33733366

33743367
return [(self.map_tensor_name(name), data_torch)]
33753368

3376-
3377-
@ModelBase.register("RobertaModel")
3378-
class RobertaModel(BertModel):
3379-
model_arch = gguf.MODEL_ARCH.BERT
3380-
3381-
def __init__(self, *args, **kwargs):
3382-
super().__init__(*args, **kwargs)
3383-
3369+
def _xlmroberta_tokenizer_init(self) -> None:
33843370
# we need the pad_token_id to know how to chop down position_embd matrix
33853371
if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
33863372
self._position_offset = 1 + pad_token_id
@@ -3389,82 +3375,7 @@ def __init__(self, *args, **kwargs):
33893375
else:
33903376
self._position_offset = None
33913377

3392-
def set_vocab(self):
3393-
"""Support BPE tokenizers for roberta models"""
3394-
bpe_tok_path = self.dir_model / "tokenizer.json"
3395-
if bpe_tok_path.exists():
3396-
self._set_vocab_gpt2()
3397-
self.gguf_writer.add_add_bos_token(True)
3398-
self.gguf_writer.add_add_eos_token(True)
3399-
3400-
# we need this to validate the size of the token_type embeddings
3401-
# though currently we are passing all zeros to the token_type embeddings
3402-
# "Sequence A" or "Sequence B"
3403-
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
3404-
3405-
else:
3406-
return super().set_vocab()
3407-
3408-
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3409-
# if name starts with "roberta.", remove the prefix
3410-
# e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
3411-
if name.startswith("roberta."):
3412-
name = name[8:]
3413-
3414-
# position embeddings start at pad_token_id + 1, so just chop down the weight tensor
3415-
if name == "embeddings.position_embeddings.weight":
3416-
if self._position_offset is not None:
3417-
data_torch = data_torch[self._position_offset:,:]
3418-
3419-
return super().modify_tensors(data_torch, name, bid)
3420-
3421-
3422-
@ModelBase.register("NomicBertModel")
3423-
class NomicBertModel(BertModel):
3424-
model_arch = gguf.MODEL_ARCH.NOMIC_BERT
3425-
3426-
def __init__(self, *args, **kwargs):
3427-
super().__init__(*args, **kwargs)
3428-
3429-
# the HF config claims n_ctx=8192, but it uses RoPE scaling
3430-
self.hparams["n_ctx"] = 2048
3431-
3432-
# SwigLU activation
3433-
assert self.hparams["activation_function"] == "swiglu"
3434-
# this doesn't do anything in the HF version
3435-
assert self.hparams["causal"] is False
3436-
# no bias tensors
3437-
assert self.hparams["qkv_proj_bias"] is False
3438-
assert self.hparams["mlp_fc1_bias"] is False
3439-
assert self.hparams["mlp_fc2_bias"] is False
3440-
# norm at end of layer
3441-
assert self.hparams["prenorm"] is False
3442-
# standard RoPE
3443-
assert self.hparams["rotary_emb_fraction"] == 1.0
3444-
assert self.hparams["rotary_emb_interleaved"] is False
3445-
assert self.hparams["rotary_emb_scale_base"] is None
3446-
3447-
def set_gguf_parameters(self):
3448-
super().set_gguf_parameters()
3449-
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
3450-
3451-
3452-
@ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
3453-
class XLMRobertaModel(BertModel):
3454-
model_arch = gguf.MODEL_ARCH.BERT
3455-
3456-
def __init__(self, *args, **kwargs):
3457-
super().__init__(*args, **kwargs)
3458-
3459-
# we need the pad_token_id to know how to chop down position_embd matrix
3460-
if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
3461-
self._position_offset = 1 + pad_token_id
3462-
if "max_position_embeddings" in self.hparams:
3463-
self.hparams["max_position_embeddings"] -= self._position_offset
3464-
else:
3465-
self._position_offset = None
3466-
3467-
def set_vocab(self):
3378+
def _xlmroberta_set_vocab(self) -> None:
34683379
# to avoid TypeError: Descriptors cannot be created directly
34693380
# exception when importing sentencepiece_model_pb2
34703381
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
@@ -3546,6 +3457,138 @@ def set_vocab(self):
35463457
self.gguf_writer.add_add_bos_token(True)
35473458
self.gguf_writer.add_add_eos_token(True)
35483459

3460+
3461+
@ModelBase.register("RobertaModel")
3462+
class RobertaModel(BertModel):
3463+
model_arch = gguf.MODEL_ARCH.BERT
3464+
3465+
def __init__(self, *args, **kwargs):
3466+
super().__init__(*args, **kwargs)
3467+
3468+
# we need the pad_token_id to know how to chop down position_embd matrix
3469+
if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
3470+
self._position_offset = 1 + pad_token_id
3471+
if "max_position_embeddings" in self.hparams:
3472+
self.hparams["max_position_embeddings"] -= self._position_offset
3473+
else:
3474+
self._position_offset = None
3475+
3476+
def set_vocab(self):
3477+
"""Support BPE tokenizers for roberta models"""
3478+
bpe_tok_path = self.dir_model / "tokenizer.json"
3479+
if bpe_tok_path.exists():
3480+
self._set_vocab_gpt2()
3481+
self.gguf_writer.add_add_bos_token(True)
3482+
self.gguf_writer.add_add_eos_token(True)
3483+
3484+
# we need this to validate the size of the token_type embeddings
3485+
# though currently we are passing all zeros to the token_type embeddings
3486+
# "Sequence A" or "Sequence B"
3487+
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
3488+
3489+
else:
3490+
return super().set_vocab()
3491+
3492+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3493+
# if name starts with "roberta.", remove the prefix
3494+
# e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
3495+
if name.startswith("roberta."):
3496+
name = name[8:]
3497+
3498+
# position embeddings start at pad_token_id + 1, so just chop down the weight tensor
3499+
if name == "embeddings.position_embeddings.weight":
3500+
if self._position_offset is not None:
3501+
data_torch = data_torch[self._position_offset:,:]
3502+
3503+
return super().modify_tensors(data_torch, name, bid)
3504+
3505+
3506+
@ModelBase.register("NomicBertModel")
3507+
class NomicBertModel(BertModel):
3508+
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, **kwargs: Any):
3509+
hparams = kwargs.pop("hparams", None)
3510+
if hparams is None:
3511+
hparams = ModelBase.load_hparams(dir_model)
3512+
3513+
self.is_moe = bool(hparams.get("moe_every_n_layers"))
3514+
self.model_arch = gguf.MODEL_ARCH.NOMIC_BERT_MOE if self.is_moe else gguf.MODEL_ARCH.NOMIC_BERT
3515+
3516+
super().__init__(dir_model, ftype, fname_out, hparams=hparams, **kwargs)
3517+
3518+
self._tokenizer_is_xlmroberta = self._is_tokenizer_xlmroberta()
3519+
if self._tokenizer_is_xlmroberta:
3520+
self._xlmroberta_tokenizer_init()
3521+
3522+
# the HF config claims n_ctx=8192, but it uses RoPE scaling
3523+
self.hparams["n_ctx"] = 2048
3524+
3525+
assert self.hparams["activation_function"] == "gelu" if self.is_moe else "swiglu"
3526+
3527+
# this doesn't do anything in the HF version
3528+
assert self.hparams["causal"] is False
3529+
# no bias tensors unless MoE
3530+
assert self.hparams["qkv_proj_bias"] == self.is_moe
3531+
assert self.hparams["mlp_fc1_bias"] == self.is_moe
3532+
assert self.hparams["mlp_fc2_bias"] == self.is_moe
3533+
3534+
# norm at end of layer
3535+
assert self.hparams["prenorm"] is False
3536+
# standard RoPE
3537+
assert self.hparams["rotary_emb_fraction"] == 1.0
3538+
assert self.hparams["rotary_emb_interleaved"] is False
3539+
assert self.hparams["rotary_emb_scale_base"] is None
3540+
3541+
def set_vocab(self) -> None:
3542+
if self._tokenizer_is_xlmroberta:
3543+
return self._xlmroberta_set_vocab()
3544+
return super().set_vocab()
3545+
3546+
def modify_tensors(self, data_torch: torch.Tensor, name: str, bid: int | None) -> Iterable[tuple[str, torch.Tensor]]:
3547+
# If the tensor is an experts bias tensor, skip it by returning an empty list.
3548+
if "mlp.experts.bias" in name:
3549+
return [] # Explicitly return an empty list.
3550+
3551+
if "mlp.experts.mlp.w1" in name:
3552+
data_torch = data_torch.view(self.hparams["num_experts"], self.hparams["n_inner"], self.hparams["n_embd"])
3553+
name += ".weight"
3554+
3555+
if "mlp.experts.mlp.w2" in name:
3556+
data_torch = data_torch.view(self.hparams["num_experts"], self.hparams["n_inner"], self.hparams["n_embd"])
3557+
data_torch = data_torch.transpose(1, 2)
3558+
name += ".weight"
3559+
3560+
return [(self.map_tensor_name(name), data_torch)]
3561+
3562+
def set_gguf_parameters(self):
3563+
super().set_gguf_parameters()
3564+
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
3565+
if self.is_moe:
3566+
self.gguf_writer.add_moe_every_n_layers(self.hparams["moe_every_n_layers"])
3567+
self.gguf_writer.add_expert_count(self.hparams["num_experts"])
3568+
self.gguf_writer.add_expert_used_count(self.hparams["moe_top_k"])
3569+
3570+
def _is_tokenizer_xlmroberta(self) -> bool:
3571+
with open(self.dir_model / "tokenizer.json") as f:
3572+
tokenizer_json = json.load(f)
3573+
toktyp = tokenizer_json["model"]["type"]
3574+
if toktyp == "Unigram":
3575+
return True
3576+
if toktyp == "WordPiece":
3577+
return False
3578+
raise ValueError(f"unknown tokenizer: {toktyp}")
3579+
3580+
3581+
@ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
3582+
class XLMRobertaModel(BertModel):
3583+
model_arch = gguf.MODEL_ARCH.BERT
3584+
3585+
def __init__(self, *args, **kwargs):
3586+
super().__init__(*args, **kwargs)
3587+
self._xlmroberta_tokenizer_init()
3588+
3589+
def set_vocab(self):
3590+
self._xlmroberta_set_vocab()
3591+
35493592
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
35503593
# if name starts with "roberta.", remove the prefix
35513594
# e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main

0 commit comments

Comments
 (0)