Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ models/*
models-mnt
!models/.editorconfig
!models/ggml-vocab-*.gguf*
!models/templates

# Zig
zig-out/
Expand Down
76 changes: 71 additions & 5 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1900,6 +1900,7 @@ def prepare_tensors(self):
"MixtralForCausalLM",
"VLlama3ForCausalLM",
"LlavaForConditionalGeneration",
"VoxtralForConditionalGeneration",
"LlamaModel")
class LlamaModel(TextModel):
model_arch = gguf.MODEL_ARCH.LLAMA
Expand All @@ -1912,6 +1913,11 @@ def __init__(self, *args, **kwargs):
self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)

def set_vocab(self):
path_tekken_json = self.dir_model / "tekken.json"
path_tokenizer_json = self.dir_model / "tokenizer.json"
if path_tekken_json.is_file() and not path_tokenizer_json.is_file():
return self.set_vocab_tekken()

try:
self._set_vocab_sentencepiece()
except FileNotFoundError:
Expand Down Expand Up @@ -1944,6 +1950,52 @@ def set_vocab(self):
if self.hparams.get("vocab_size", 32000) == 49152:
self.gguf_writer.add_add_bos_token(False)

def set_vocab_tekken(self):
vocab = gguf.vocab.MistralVocab(self.dir_model)
self.gguf_writer.add_tokenizer_model(vocab.gguf_tokenizer_model)

tokens = []
scores = []
toktypes = []

for text, score, toktype in vocab.all_tokens():
tokens.append(text)
scores.append(score)
toktypes.append(toktype)

assert len(tokens) == vocab.vocab_size, (
f"token count ({len(tokens)}) != vocab size ({vocab.vocab_size})"
)

if vocab.tokenizer_type == gguf.vocab.MistralTokenizerType.tekken:
self.gguf_writer.add_tokenizer_pre("tekken")
self.gguf_writer.add_token_merges(
vocab.extract_vocab_merges_from_model()
)

logger.info(
f"Setting bos, eos, unk and pad token IDs to {vocab.bos_id}, {vocab.eos_id}, {vocab.unk_id}, {vocab.pad_id}."
)

self.gguf_writer.add_bos_token_id(vocab.bos_id)
self.gguf_writer.add_eos_token_id(vocab.eos_id)
self.gguf_writer.add_unk_token_id(vocab.unk_id)
self.gguf_writer.add_pad_token_id(vocab.pad_id)

self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_scores(scores)
self.gguf_writer.add_token_types(toktypes)
self.gguf_writer.add_vocab_size(vocab.vocab_size)

self.gguf_writer.add_add_bos_token(True)
self.gguf_writer.add_add_eos_token(False)

script_dir = Path(__file__).parent
template_path = script_dir / "models/templates/unsloth-mistral-Devstral-Small-2507.jinja"
with open(template_path, "r", encoding="utf-8") as f:
template = f.read()
self.gguf_writer.add_chat_template(template)

def set_gguf_parameters(self):
super().set_gguf_parameters()
hparams = self.hparams
Expand Down Expand Up @@ -1971,12 +2023,13 @@ def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
n_head = self.hparams["num_attention_heads"]
n_kv_head = self.hparams.get("num_key_value_heads")
is_vision_tensor = "vision_tower" in name \
is_multimodal_tensor = "vision_tower" in name \
or "vision_model" in name \
or "audio_tower" in name \
or "model.connector" in name \
or "multi_modal_projector" in name

if is_vision_tensor:
if is_multimodal_tensor:
return [] # skip vision tensors
elif self.hf_arch == "LlamaModel":
name = "model." + name
Expand Down Expand Up @@ -7231,9 +7284,10 @@ class WhisperEncoderModel(MmprojModel):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.hparams["hidden_size"] = self.hparams["d_model"]
self.hparams["intermediate_size"] = self.hparams["encoder_ffn_dim"]
self.hparams["num_attention_heads"] = self.hparams["encoder_attention_heads"]
if "hidden_size" not in self.hparams and "intermediate_size" not in self.hparams:
self.hparams["hidden_size"] = self.hparams["d_model"]
self.hparams["intermediate_size"] = self.hparams["encoder_ffn_dim"]
self.hparams["num_attention_heads"] = self.hparams["encoder_attention_heads"]

def set_gguf_parameters(self):
super().set_gguf_parameters()
Expand Down Expand Up @@ -7272,9 +7326,21 @@ class UltravoxWhisperEncoderModel(WhisperEncoderModel):

def set_gguf_parameters(self):
super().set_gguf_parameters()
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.ULTRAVOX)
self.gguf_writer.add_audio_stack_factor(self.global_config["stack_factor"])


@ModelBase.register("VoxtralForConditionalGeneration")
class VoxtralWhisperEncoderModel(WhisperEncoderModel):
has_vision_encoder = False # no vision encoder
has_audio_encoder = True

def set_gguf_parameters(self):
super().set_gguf_parameters()
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.VOXTRAL)
self.gguf_writer.add_audio_stack_factor(4) # == intermediate_size // hidden_size


@ModelBase.register("FalconH1ForCausalLM")
class FalconH1Model(Mamba2Model):
model_arch = gguf.MODEL_ARCH.FALCON_H1
Expand Down
3 changes: 3 additions & 0 deletions docs/multimodal.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ NOTE: some models may require large context window, for example: `-c 8192`
# Qwen2-Audio and SeaLLM-Audio
# note: no pre-quantized GGUF this model, as they have very poor result
# ref: https://github.com/ggml-org/llama.cpp/pull/13760

# Mistral's Voxtral
(tool_name) -hf ggml-org/Voxtral-Mini-3B-2507-GGUF
```

**Mixed modalities**:
Expand Down
1 change: 1 addition & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2704,6 +2704,7 @@ class VisionProjectorType:
INTERNVL = "internvl"
QWEN2A = "qwen2a" # audio
QWEN25O = "qwen2.5o" # omni
VOXTRAL = "voxtral"


# Items here are (block size, type size)
Expand Down
Loading
Loading