diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 3e3db999c92ed..55e1f722f3334 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -53,10 +53,8 @@ "`pip install mistral-common[image,audio]` to install it." ) - logger = logging.getLogger("hf-to-gguf") - ###### MODEL DEFINITIONS ###### class SentencePieceTokenTypes(IntEnum): @@ -67,15 +65,12 @@ class SentencePieceTokenTypes(IntEnum): UNUSED = 5 BYTE = 6 - class ModelType(IntEnum): TEXT = 1 MMPROJ = 2 - AnyModel = TypeVar("AnyModel", bound="type[ModelBase]") - class ModelBase: _model_classes: dict[ModelType, dict[str, type[ModelBase]]] = { ModelType.TEXT: {}, @@ -647,7 +642,6 @@ def from_model_architecture(cls, arch: str, model_type = ModelType.TEXT) -> type except KeyError: raise NotImplementedError(f'Architecture {arch!r} not supported!') from None - class TextModel(ModelBase): model_type = ModelType.TEXT hf_arch: str @@ -750,6 +744,47 @@ def set_gguf_parameters(self): self.gguf_writer.add_file_type(self.ftype) logger.info(f"gguf: file type = {self.ftype}") + def _clean_chat_template_to_mtmd(self) -> bool: + """Normalize vision/audio markers in chat_template to the MTMD placeholder. + + Reads self.dir_model/tokenizer_config.json and, if a chat_template is present, + replaces / and / + to <__media__>/"" respectively, then writes it via + self.gguf_writer.add_chat_template(cleaned). + + Returns True if a cleaned template was written, False otherwise. + """ + try: + import json + from pathlib import Path + cfg_path = Path(self.dir_model) / "tokenizer_config.json" + if not cfg_path.is_file(): + return False + cfg = json.loads(cfg_path.read_text(encoding="utf-8")) + chat_template = cfg.get("chat_template") + if isinstance(chat_template, list): + variants = {} + for v in chat_template: + if isinstance(v, dict): + name = v.get("name") + templ = v.get("template") + if name is not None and templ is not None: + variants[name] = templ + chat_template = variants.get("default") or next((t for t in variants.values() if isinstance(t, str)), None) + if isinstance(chat_template, str): + cleaned = (chat_template + .replace("", "<__media__>") + .replace("", "") + .replace("", "<__media__>") + .replace("", "")) + if cleaned != chat_template: + logger.info("gguf: clean Gemma vision/audio markers to <__media__>") + self.gguf_writer.add_chat_template(cleaned) + return True + except Exception as e: + logger.warning(f"gguf: failed to clean chat_template: {e}") + return False + def write_vocab(self): if len(self.gguf_writer.tensors) != 1: raise ValueError('Splitting the vocabulary is not supported') @@ -1426,7 +1461,6 @@ def _set_vocab_interns1(self): special_vocab._set_special_token("bos", 151643) special_vocab.add_to_gguf(self.gguf_writer) - class MmprojModel(ModelBase): model_type = ModelType.MMPROJ model_arch = gguf.MODEL_ARCH.MMPROJ @@ -1558,7 +1592,6 @@ def tensor_force_quant(self, name, new_name, bid, n_dims): return gguf.GGMLQuantizationType.F16 if self.ftype == gguf.LlamaFileType.MOSTLY_F16 else gguf.GGMLQuantizationType.F32 return False - @ModelBase.register("GPTNeoXForCausalLM") class GPTNeoXModel(TextModel): model_arch = gguf.MODEL_ARCH.GPTNEOX @@ -1615,7 +1648,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return tensors - @ModelBase.register("BloomForCausalLM", "BloomModel") class BloomModel(TextModel): model_arch = gguf.MODEL_ARCH.BLOOM @@ -1672,7 +1704,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return tensors - @ModelBase.register("MPTForCausalLM") class MPTModel(TextModel): model_arch = gguf.MODEL_ARCH.MPT @@ -1716,7 +1747,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(new_name, data_torch)] - @ModelBase.register("OrionForCausalLM") class OrionModel(TextModel): model_arch = gguf.MODEL_ARCH.ORION @@ -1751,7 +1781,6 @@ def set_gguf_parameters(self): # ref: https://huggingface.co/OrionStarAI/Orion-14B-Chat/blob/276a17221ce42beb45f66fac657a41540e71f4f5/modeling_orion.py#L570-L571 self.gguf_writer.add_layer_norm_eps(self.hparams["rms_norm_eps"]) - @ModelBase.register("BaichuanForCausalLM", "BaiChuanForCausalLM") class BaichuanModel(TextModel): model_arch = gguf.MODEL_ARCH.BAICHUAN @@ -1831,7 +1860,6 @@ def _reverse_hf_part(self, weights: Tensor, n_part: int) -> Tensor: r = weights.shape[0] // 3 return weights[r * n_part:r * n_part + r, ...] - @ModelBase.register("XverseForCausalLM") class XverseModel(TextModel): model_arch = gguf.MODEL_ARCH.XVERSE @@ -1938,7 +1966,6 @@ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | Non .reshape(weights.shape) ) - @ModelBase.register("FalconForCausalLM", "RWForCausalLM") class FalconModel(TextModel): model_arch = gguf.MODEL_ARCH.FALCON @@ -1992,7 +2019,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("GPTBigCodeForCausalLM") class StarCoderModel(TextModel): model_arch = gguf.MODEL_ARCH.STARCODER @@ -2009,7 +2035,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"]) self.gguf_writer.add_file_type(self.ftype) - @ModelBase.register("GPTRefactForCausalLM") class RefactModel(TextModel): model_arch = gguf.MODEL_ARCH.REFACT @@ -2073,7 +2098,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return tensors - @ModelBase.register("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM") class StableLMModel(TextModel): model_arch = gguf.MODEL_ARCH.STABLELM @@ -2163,7 +2187,6 @@ def prepare_tensors(self): if len(norms) > 0: raise ValueError(f"Unprocessed norms: {norms}") - @ModelBase.register( "LLaMAForCausalLM", "LlamaForCausalLM", @@ -2422,7 +2445,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("ArceeForCausalLM") class ArceeModel(LlamaModel): model_arch = gguf.MODEL_ARCH.ARCEE @@ -2436,7 +2458,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"]) self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"]) - @ModelBase.register( "LlavaForConditionalGeneration", # pixtral "Mistral3ForConditionalGeneration", # mistral small 3.1 @@ -2521,7 +2542,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [] # skip other tensors - @ModelBase.register("Idefics3ForConditionalGeneration", "SmolVLMForConditionalGeneration") class SmolVLMModel(MmprojModel): def __init__(self, *args, **kwargs): @@ -2558,7 +2578,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [] # skip other tensors - @ModelBase.register( "Llama4ForConditionalGeneration", "Llama4ForCausalLM", @@ -2608,7 +2627,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None): return [] return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("Llama4ForConditionalGeneration") class Llama4VisionModel(MmprojModel): def set_gguf_parameters(self): @@ -2631,7 +2649,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] return [] - @ModelBase.register("Mistral3ForConditionalGeneration") class Mistral3Model(LlamaModel): model_arch = gguf.MODEL_ARCH.LLAMA @@ -2642,7 +2659,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None): return [] return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("DeciLMForCausalLM") class DeciModel(TextModel): model_arch = gguf.MODEL_ARCH.DECI @@ -2819,7 +2835,6 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: def prepare_tensors(self): super().prepare_tensors() - @ModelBase.register("BitnetForCausalLM") class BitnetModel(TextModel): model_arch = gguf.MODEL_ARCH.BITNET @@ -2860,7 +2875,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter yield (new_name, data_torch) - @ModelBase.register("GrokForCausalLM", "Grok1ForCausalLM") class GrokModel(TextModel): model_arch = gguf.MODEL_ARCH.GROK @@ -2965,7 +2979,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter yield from tensors - @ModelBase.register("DbrxForCausalLM") class DbrxModel(TextModel): model_arch = gguf.MODEL_ARCH.DBRX @@ -3034,7 +3047,6 @@ def tensor_force_quant(self, name: str, new_name: str, bid: int | None, n_dims: return n_dims > 1 - @ModelBase.register("MiniCPMForCausalLM") class MiniCPMModel(TextModel): model_arch = gguf.MODEL_ARCH.MINICPM @@ -3089,7 +3101,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("MiniCPM3ForCausalLM") class MiniCPM3Model(TextModel): model_arch = gguf.MODEL_ARCH.MINICPM3 @@ -3142,7 +3153,6 @@ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | Non .reshape(weights.shape) ) - @ModelBase.register("QWenLMHeadModel") class QwenModel(TextModel): model_arch = gguf.MODEL_ARCH.QWEN @@ -3184,7 +3194,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"]) self.gguf_writer.add_file_type(self.ftype) - @ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration") class Qwen2Model(TextModel): model_arch = gguf.MODEL_ARCH.QWEN2 @@ -3216,7 +3225,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [] yield from super().modify_tensors(data_torch, name, bid) - @ModelBase.register("DreamModel") class DreamModel(TextModel): model_arch = gguf.MODEL_ARCH.DREAM @@ -3286,7 +3294,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter # Dream model tensors should be mapped directly since it's the base model yield from super().modify_tensors(data_torch, name, bid) - @ModelBase.register("LLaDAModelLM") class LLaDAModel(TextModel): model_arch = gguf.MODEL_ARCH.LLADA @@ -3387,7 +3394,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter # LLaDA model tensors should be mapped directly since it's the base model yield from super().modify_tensors(data_torch, name, bid) - @ModelBase.register("Ernie4_5_ForCausalLM", "Ernie4_5ForCausalLM") class Ernie4_5Model(TextModel): model_arch = gguf.MODEL_ARCH.ERNIE4_5 @@ -3434,7 +3440,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter ] return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("Ernie4_5_MoeForCausalLM") class Ernie4_5MoeModel(Ernie4_5Model): model_arch = gguf.MODEL_ARCH.ERNIE4_5_MOE @@ -3521,7 +3526,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register( "Qwen2VLModel", "Qwen2VLForConditionalGeneration", @@ -3553,7 +3557,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [] return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("Qwen2VLModel", "Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration") class Qwen2VLVisionModel(MmprojModel): def __init__(self, *args, **kwargs): @@ -3632,7 +3635,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] return [] # skip other tensors - @ModelBase.register("Qwen2_5OmniModel") class Qwen25OmniModel(Qwen2VLVisionModel): has_vision_encoder = True @@ -3691,7 +3693,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("InternVisionModel") class InternVisionModel(MmprojModel): def set_gguf_parameters(self): @@ -3766,7 +3767,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] return [] # skip other tensors - @ModelBase.register("WavTokenizerDec") class WavTokenizerDecModel(TextModel): model_arch = gguf.MODEL_ARCH.WAVTOKENIZER_DEC @@ -3804,7 +3804,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_causal_attention(False) - @ModelBase.register("Qwen2MoeForCausalLM") class Qwen2MoeModel(TextModel): model_arch = gguf.MODEL_ARCH.QWEN2MOE @@ -3878,7 +3877,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("Qwen3ForCausalLM") class Qwen3Model(Qwen2Model): model_arch = gguf.MODEL_ARCH.QWEN3 @@ -3961,7 +3959,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("Qwen3MoeForCausalLM") class Qwen3MoeModel(Qwen2MoeModel): model_arch = gguf.MODEL_ARCH.QWEN3MOE @@ -3979,7 +3976,6 @@ def set_vocab(self): super().set_vocab() - @ModelBase.register("GPT2LMHeadModel") class GPT2Model(TextModel): model_arch = gguf.MODEL_ARCH.GPT2 @@ -4011,7 +4007,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return tensors - @ModelBase.register("PhiForCausalLM") class Phi2Model(TextModel): model_arch = gguf.MODEL_ARCH.PHI2 @@ -4035,7 +4030,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_add_bos_token(False) - @ModelBase.register("Phi3ForCausalLM") class Phi3MiniModel(TextModel): model_arch = gguf.MODEL_ARCH.PHI3 @@ -4213,7 +4207,6 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_LONG), torch.tensor(long_factors, dtype=torch.float32)) yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32)) - @ModelBase.register("PhiMoEForCausalLM") class PhiMoeModel(Phi3MiniModel): model_arch = gguf.MODEL_ARCH.PHIMOE @@ -4270,7 +4263,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("PlamoForCausalLM") class PlamoModel(TextModel): model_arch = gguf.MODEL_ARCH.PLAMO @@ -4318,7 +4310,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(new_name, data_torch)] - @ModelBase.register("Plamo2ForCausalLM", "PLaMo2ForCausalLM") class Plamo2Model(TextModel): model_arch = gguf.MODEL_ARCH.PLAMO2 @@ -4492,7 +4483,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(new_name, data_torch)] - @ModelBase.register("CodeShellForCausalLM") class CodeShellModel(TextModel): model_arch = gguf.MODEL_ARCH.CODESHELL @@ -4512,7 +4502,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR) self.gguf_writer.add_rope_scaling_factor(1.0) - @ModelBase.register("InternLM2ForCausalLM") class InternLM2Model(TextModel): model_arch = gguf.MODEL_ARCH.INTERNLM2 @@ -4690,7 +4679,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter else: return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("InternLM3ForCausalLM") class InternLM3Model(TextModel): model_arch = gguf.MODEL_ARCH.LLAMA @@ -4752,7 +4740,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head) return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("BertModel", "BertForMaskedLM", "CamembertModel", "BertForSequenceClassification") class BertModel(TextModel): model_arch = gguf.MODEL_ARCH.BERT @@ -4972,7 +4959,6 @@ def _xlmroberta_set_vocab(self) -> None: special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) special_vocab.add_to_gguf(self.gguf_writer) - @ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification") class DistilBertModel(BertModel): model_arch = gguf.MODEL_ARCH.BERT @@ -4992,7 +4978,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("RobertaModel", "RobertaForSequenceClassification") class RobertaModel(BertModel): model_arch = gguf.MODEL_ARCH.BERT @@ -5035,7 +5020,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("NomicBertModel") class NomicBertModel(BertModel): model_arch = gguf.MODEL_ARCH.BERT @@ -5117,7 +5101,6 @@ def _is_tokenizer_xlmroberta(self) -> bool: return False raise ValueError(f"unknown tokenizer: {toktyp}") - @ModelBase.register("NeoBERT", "NeoBERTLMHead", "NeoBERTForSequenceClassification") class NeoBert(BertModel): model_arch = gguf.MODEL_ARCH.NEO_BERT @@ -5145,7 +5128,6 @@ def modify_tensors(self, data_torch, name, bid): return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification") class XLMRobertaModel(BertModel): model_arch = gguf.MODEL_ARCH.BERT @@ -5243,7 +5225,6 @@ def write(self): lora_writer.write_tensors_to_file(progress=True) lora_writer.close() - @ModelBase.register("GemmaForCausalLM") class GemmaModel(TextModel): model_arch = gguf.MODEL_ARCH.GEMMA @@ -5293,8 +5274,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter data_torch = data_torch + 1 return [(self.map_tensor_name(name), data_torch)] - - @ModelBase.register("Gemma2ForCausalLM") class Gemma2Model(TextModel): model_arch = gguf.MODEL_ARCH.GEMMA2 @@ -5303,6 +5282,7 @@ def set_vocab(self): self._set_vocab_sentencepiece() self.gguf_writer.add_add_space_prefix(False) + self._clean_chat_template_to_mtmd() def set_gguf_parameters(self): hparams = self.hparams @@ -5341,7 +5321,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("Gemma3ForCausalLM", "Gemma3ForConditionalGeneration") class Gemma3Model(TextModel): model_arch = gguf.MODEL_ARCH.GEMMA3 @@ -5351,6 +5330,7 @@ def set_vocab(self): self._set_vocab_sentencepiece() self.gguf_writer.add_add_space_prefix(False) + self._clean_chat_template_to_mtmd() def set_gguf_parameters(self): hparams = self.hparams @@ -5401,7 +5381,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("Gemma3TextModel") class EmbeddingGemma(Gemma3Model): model_arch = gguf.MODEL_ARCH.GEMMA_EMBEDDING @@ -5475,7 +5454,6 @@ def set_gguf_parameters(self): self._try_set_pooling_type() - @ModelBase.register("Gemma3ForConditionalGeneration") class Gemma3VisionModel(MmprojModel): def set_gguf_parameters(self): @@ -5526,7 +5504,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [] # skip other tensors - @ModelBase.register("Gemma3nForConditionalGeneration") class Gemma3NModel(Gemma3Model): model_arch = gguf.MODEL_ARCH.GEMMA3N @@ -5621,12 +5598,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("Starcoder2ForCausalLM") class StarCoder2Model(TextModel): model_arch = gguf.MODEL_ARCH.STARCODER2 - @ModelBase.register("Rwkv6ForCausalLM") class Rwkv6Model(TextModel): model_arch = gguf.MODEL_ARCH.RWKV6 @@ -5699,7 +5674,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter yield (new_name, data_torch) - @ModelBase.register("RWKV6Qwen2ForCausalLM") class RWKV6Qwen2Model(Rwkv6Model): model_arch = gguf.MODEL_ARCH.RWKV6QWEN2 @@ -5753,7 +5727,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter continue yield (new_name, data) - @ModelBase.register("Rwkv7ForCausalLM", "RWKV7ForCausalLM") class Rwkv7Model(TextModel): model_arch = gguf.MODEL_ARCH.RWKV7 @@ -5872,7 +5845,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter yield (new_name, data_torch) - @ModelBase.register("RwkvHybridForCausalLM") class ARwkv7Model(Rwkv7Model): model_arch = gguf.MODEL_ARCH.ARWKV7 @@ -5915,7 +5887,6 @@ def set_gguf_parameters(self): # required by llama.cpp, unused self.gguf_writer.add_head_count(0) - @ModelBase.register("MambaForCausalLM", "MambaLMHeadModel", "FalconMambaForCausalLM") class MambaModel(TextModel): model_arch = gguf.MODEL_ARCH.MAMBA @@ -6001,7 +5972,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(new_name, data_torch)] - @ModelBase.register("Mamba2ForCausalLM") class Mamba2Model(TextModel): model_arch = gguf.MODEL_ARCH.MAMBA2 @@ -6094,7 +6064,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter yield (new_name, data_torch) - @ModelBase.register("JambaForCausalLM") class JambaModel(TextModel): model_arch = gguf.MODEL_ARCH.JAMBA @@ -6203,7 +6172,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("CohereForCausalLM") class CommandR2Model(TextModel): model_arch = gguf.MODEL_ARCH.COMMAND_R @@ -6221,7 +6189,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_logit_scale(self.hparams["logit_scale"]) self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE) - @ModelBase.register("Cohere2ForCausalLM") class Cohere2Model(TextModel): model_arch = gguf.MODEL_ARCH.COHERE2 @@ -6239,7 +6206,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_rope_dimension_count(int(rotary_pct * (hidden_size // num_attention_heads))) self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE) - @ModelBase.register("OlmoForCausalLM") @ModelBase.register("OLMoForCausalLM") class OlmoModel(TextModel): @@ -6267,12 +6233,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("SeedOssForCausalLM") class SeedOssModel(TextModel): model_arch = gguf.MODEL_ARCH.SEED_OSS - @ModelBase.register("Olmo2ForCausalLM") @ModelBase.register("Olmo3ForCausalLM") class Olmo2Model(TextModel): @@ -6302,7 +6266,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_sliding_window_pattern(sliding_window_pattern) - @ModelBase.register("OlmoeForCausalLM") class OlmoeModel(TextModel): model_arch = gguf.MODEL_ARCH.OLMOE @@ -6362,7 +6325,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("JinaBertModel", "JinaBertForMaskedLM") class JinaBertV2Model(BertModel): model_arch = gguf.MODEL_ARCH.JINA_BERT_V2 @@ -6380,7 +6342,6 @@ def set_vocab(self): else: raise NotImplementedError(f'Tokenizer {tokenizer_class} is not supported for JinaBertModel') - @ModelBase.register("OpenELMForCausalLM") class OpenELMModel(TextModel): model_arch = gguf.MODEL_ARCH.OPENELM @@ -6455,7 +6416,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter yield (self.map_tensor_name(name), data_torch) - @ModelBase.register("ArcticForCausalLM") class ArcticModel(TextModel): model_arch = gguf.MODEL_ARCH.ARCTIC @@ -6606,7 +6566,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("DeepseekForCausalLM") class DeepseekModel(TextModel): model_arch = gguf.MODEL_ARCH.DEEPSEEK @@ -6695,7 +6654,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register( "DeepseekV2ForCausalLM", "DeepseekV3ForCausalLM", @@ -6884,7 +6842,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("Dots1ForCausalLM") class Dots1Model(Qwen2MoeModel): model_arch = gguf.MODEL_ARCH.DOTS1 @@ -6912,7 +6869,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None): return [(self.map_tensor_name(name), data_torch)] return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("PLMForCausalLM") class PLMModel(TextModel): model_arch = gguf.MODEL_ARCH.PLM @@ -6935,7 +6891,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter def prepare_tensors(self): super().prepare_tensors() - @ModelBase.register("T5WithLMHeadModel") @ModelBase.register("T5ForConditionalGeneration") @ModelBase.register("MT5ForConditionalGeneration") @@ -7077,7 +7032,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("T5EncoderModel") class T5EncoderModel(TextModel): model_arch = gguf.MODEL_ARCH.T5ENCODER @@ -7213,7 +7167,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("JAISLMHeadModel") class JaisModel(TextModel): model_arch = gguf.MODEL_ARCH.JAIS @@ -7296,7 +7249,6 @@ def prepare_tensors(self): super().prepare_tensors() self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias) - @ModelBase.register("Glm4ForCausalLM", "Glm4vForConditionalGeneration") class Glm4Model(TextModel): model_arch = gguf.MODEL_ARCH.GLM4 @@ -7335,7 +7287,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter name = name.replace("language_model.", "") # for Glm4v return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("Glm4MoeForCausalLM") class Glm4MoeModel(TextModel): model_arch = gguf.MODEL_ARCH.GLM4_MOE @@ -7468,7 +7419,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration") class ChatGLMModel(TextModel): model_arch = gguf.MODEL_ARCH.CHATGLM @@ -7623,7 +7573,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter name = name.removeprefix("transformer.") return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("NemotronForCausalLM") class NemotronModel(TextModel): model_arch = gguf.MODEL_ARCH.NEMOTRON @@ -7664,7 +7613,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("ExaoneForCausalLM") class ExaoneModel(TextModel): model_arch = gguf.MODEL_ARCH.EXAONE @@ -7734,7 +7682,6 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32)) - @ModelBase.register("Exaone4ForCausalLM") class Exaone4Model(TextModel): model_arch = gguf.MODEL_ARCH.EXAONE4 @@ -7803,7 +7750,6 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32)) - @ModelBase.register("GraniteForCausalLM") class GraniteModel(LlamaModel): """Conversion for IBM's GraniteForCausalLM""" @@ -7837,7 +7783,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_logit_scale(logits_scale) logger.info("gguf: (granite) logits_scale = %s", logits_scale) - @ModelBase.register("GraniteMoeForCausalLM", "GraniteMoeSharedForCausalLM") class GraniteMoeModel(GraniteModel): """Conversion for IBM's GraniteMoeForCausalLM""" @@ -7891,7 +7836,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return super().modify_tensors(data_torch, name, bid) - @ModelBase.register("GraniteMoeHybridForCausalLM", "BambaForCausalLM") class GraniteHybridModel(Mamba2Model, GraniteMoeModel): """GraniteHybrid is a hybrid SSM + Attention model that uses Mamba2 SSM @@ -8031,7 +7975,6 @@ def set_vocab(self): self.hparams["pad_vocab_size_multiple"] = 8 Mamba2Model.set_vocab(self) - @ModelBase.register("NemotronHForCausalLM") class NemotronHModel(GraniteHybridModel): """Hybrid mamba2/attention model from NVIDIA""" @@ -8080,7 +8023,6 @@ def set_vocab(self): # config, so we need to explicitly override it here. self.gguf_writer.add_add_bos_token(True) - @ModelBase.register("BailingMoeForCausalLM") class BailingMoeModel(TextModel): model_arch = gguf.MODEL_ARCH.BAILINGMOE @@ -8187,7 +8129,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("BailingMoeV2ForCausalLM") class BailingMoeV2Model(TextModel): model_arch = gguf.MODEL_ARCH.BAILINGMOE2 @@ -8284,7 +8225,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("GroveMoeForCausalLM", "modeling_grove_moe.GroveMoeForCausalLM") class GroveMoeModel(TextModel): model_arch = gguf.MODEL_ARCH.GROVEMOE @@ -8399,7 +8339,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("ChameleonForConditionalGeneration") @ModelBase.register("ChameleonForCausalLM") # obsolete class ChameleonModel(TextModel): @@ -8441,7 +8380,6 @@ def _reverse_hf_permute(data_torch, n_heads, hidden_dim): data_torch = data_torch.repeat_interleave(n_heads, 0) return data_torch - @ModelBase.register("UltravoxModel") class UltravoxModel(TextModel): model_arch = gguf.MODEL_ARCH.LLAMA # dummy @@ -8450,7 +8388,6 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) raise NotImplementedError("Ultravox does not have text decoder. Instead, it uses Llama or other models for text. If you want to get the audio encoder, please use --mmproj argument") - @ModelBase.register("Qwen2AudioForConditionalGeneration") class WhisperEncoderModel(MmprojModel): has_vision_encoder = False # no vision encoder @@ -8491,7 +8428,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("UltravoxModel") class UltravoxWhisperEncoderModel(WhisperEncoderModel): has_vision_encoder = False # no vision encoder @@ -8502,7 +8438,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.ULTRAVOX) self.gguf_writer.add_audio_stack_factor(self.global_config["stack_factor"]) - @ModelBase.register("VoxtralForConditionalGeneration") class VoxtralWhisperEncoderModel(WhisperEncoderModel): has_vision_encoder = False # no vision encoder @@ -8513,7 +8448,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.VOXTRAL) self.gguf_writer.add_audio_stack_factor(4) # == intermediate_size // hidden_size - @ModelBase.register("FalconH1ForCausalLM") class FalconH1Model(Mamba2Model): model_arch = gguf.MODEL_ARCH.FALCON_H1 @@ -8620,7 +8554,6 @@ def set_gguf_parameters(self): # Add any other Falcon Mamba2 specific configuration self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"])) - @ModelBase.register("HunYuanMoEV1ForCausalLM") class HunYuanMoEModel(TextModel): model_arch = gguf.MODEL_ARCH.HUNYUAN_MOE @@ -8761,7 +8694,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("LLaDAMoEModel", "LLaDAMoEModelLM") class LLaDAMoEModel(TextModel): model_arch = gguf.MODEL_ARCH.LLADA_MOE @@ -8831,7 +8763,6 @@ def prepare_tensors(self): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") - @ModelBase.register("HunYuanDenseV1ForCausalLM") class HunYuanModel(TextModel): model_arch = gguf.MODEL_ARCH.HUNYUAN_DENSE @@ -8923,7 +8854,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("SmolLM3ForCausalLM") class SmolLM3Model(LlamaModel): model_arch = gguf.MODEL_ARCH.SMOLLM3 @@ -8938,7 +8868,6 @@ def set_vocab(self): chat_template = tokenizer.chat_template.replace("[:]", "") self.gguf_writer.add_chat_template(chat_template) - @ModelBase.register("GptOssForCausalLM") class GptOssModel(TextModel): model_arch = gguf.MODEL_ARCH.GPT_OSS @@ -9062,7 +8991,6 @@ def set_gguf_parameters(self): self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"]) self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling.get("original_max_position_embeddings", 4096)) - @ModelBase.register("Lfm2ForCausalLM", "LFM2ForCausalLM") class LFM2Model(TextModel): model_arch = gguf.MODEL_ARCH.LFM2 @@ -9111,7 +9039,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] - @ModelBase.register("Lfm2MoeForCausalLM") class LFM2MoeModel(TextModel): model_arch = gguf.MODEL_ARCH.LFM2MOE @@ -9180,7 +9107,6 @@ def prepare_tensors(self): super().prepare_tensors() assert not self._experts_cache - @ModelBase.register("Lfm2VlForConditionalGeneration") class LFM2VLModel(MmprojModel): def __init__(self, *args, **kwargs): @@ -9215,7 +9141,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [] # skip other tensors - @ModelBase.register("SmallThinkerForCausalLM") class SmallThinkerModel(TextModel): model_arch = gguf.MODEL_ARCH.SMALLTHINKER @@ -9451,8 +9376,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter ###### CONVERSION LOGIC ###### - # tree of lazy tensors + + class LazyTorchTensor(gguf.LazyBase): _tensor_type = torch.Tensor # to keep the type-checker happy