Skip to content

Commit b828887

Browse files
committed
also support Devstral conversion
1 parent 97119dd commit b828887

File tree

2 files changed

+56
-60
lines changed

2 files changed

+56
-60
lines changed

convert_hf_to_gguf.py

Lines changed: 55 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1900,6 +1900,7 @@ def prepare_tensors(self):
19001900
"MixtralForCausalLM",
19011901
"VLlama3ForCausalLM",
19021902
"LlavaForConditionalGeneration",
1903+
"VoxtralForConditionalGeneration",
19031904
"LlamaModel")
19041905
class LlamaModel(TextModel):
19051906
model_arch = gguf.MODEL_ARCH.LLAMA
@@ -1912,6 +1913,11 @@ def __init__(self, *args, **kwargs):
19121913
self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
19131914

19141915
def set_vocab(self):
1916+
path_tekken_json = self.dir_model / "tekken.json"
1917+
path_tokenizer_json = self.dir_model / "tokenizer.json"
1918+
if path_tekken_json.is_file() and not path_tokenizer_json.is_file():
1919+
return self.set_vocab_tekken()
1920+
19151921
try:
19161922
self._set_vocab_sentencepiece()
19171923
except FileNotFoundError:
@@ -1944,6 +1950,52 @@ def set_vocab(self):
19441950
if self.hparams.get("vocab_size", 32000) == 49152:
19451951
self.gguf_writer.add_add_bos_token(False)
19461952

1953+
def set_vocab_tekken(self):
1954+
vocab = gguf.vocab.MistralVocab(self.dir_model)
1955+
self.gguf_writer.add_tokenizer_model(vocab.gguf_tokenizer_model)
1956+
1957+
tokens = []
1958+
scores = []
1959+
toktypes = []
1960+
1961+
for text, score, toktype in vocab.all_tokens():
1962+
tokens.append(text)
1963+
scores.append(score)
1964+
toktypes.append(toktype)
1965+
1966+
assert len(tokens) == vocab.vocab_size, (
1967+
f"token count ({len(tokens)}) != vocab size ({vocab.vocab_size})"
1968+
)
1969+
1970+
if vocab.tokenizer_type == gguf.vocab.MistralTokenizerType.tekken:
1971+
self.gguf_writer.add_tokenizer_pre("tekken")
1972+
self.gguf_writer.add_token_merges(
1973+
vocab.extract_vocab_merges_from_model()
1974+
)
1975+
1976+
logger.info(
1977+
f"Setting bos, eos, unk and pad token IDs to {vocab.bos_id}, {vocab.eos_id}, {vocab.unk_id}, {vocab.pad_id}."
1978+
)
1979+
1980+
self.gguf_writer.add_bos_token_id(vocab.bos_id)
1981+
self.gguf_writer.add_eos_token_id(vocab.eos_id)
1982+
self.gguf_writer.add_unk_token_id(vocab.unk_id)
1983+
self.gguf_writer.add_pad_token_id(vocab.pad_id)
1984+
1985+
self.gguf_writer.add_token_list(tokens)
1986+
self.gguf_writer.add_token_scores(scores)
1987+
self.gguf_writer.add_token_types(toktypes)
1988+
self.gguf_writer.add_vocab_size(vocab.vocab_size)
1989+
1990+
self.gguf_writer.add_add_bos_token(True)
1991+
self.gguf_writer.add_add_eos_token(False)
1992+
1993+
script_dir = Path(__file__).parent
1994+
template_path = script_dir / "models/templates/unsloth-mistral-Devstral-Small-2507.jinja"
1995+
with open(template_path, "r", encoding="utf-8") as f:
1996+
template = f.read()
1997+
self.gguf_writer.add_chat_template(template)
1998+
19471999
def set_gguf_parameters(self):
19482000
super().set_gguf_parameters()
19492001
hparams = self.hparams
@@ -1971,12 +2023,13 @@ def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
19712023
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
19722024
n_head = self.hparams["num_attention_heads"]
19732025
n_kv_head = self.hparams.get("num_key_value_heads")
1974-
is_vision_tensor = "vision_tower" in name \
2026+
is_multimodal_tensor = "vision_tower" in name \
19752027
or "vision_model" in name \
2028+
or "audio_tower" in name \
19762029
or "model.connector" in name \
19772030
or "multi_modal_projector" in name
19782031

1979-
if is_vision_tensor:
2032+
if is_multimodal_tensor:
19802033
return [] # skip vision tensors
19812034
elif self.hf_arch == "LlamaModel":
19822035
name = "model." + name
@@ -2260,63 +2313,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
22602313
return super().modify_tensors(data_torch, name, bid)
22612314

22622315

2263-
@ModelBase.register("VoxtralForConditionalGeneration")
2264-
class VoxtralModel(LlamaModel):
2265-
model_arch = gguf.MODEL_ARCH.LLAMA
2266-
2267-
def set_vocab(self):
2268-
vocab = gguf.vocab.MistralVocab(self.dir_model)
2269-
self.gguf_writer.add_tokenizer_model(vocab.gguf_tokenizer_model)
2270-
2271-
tokens = []
2272-
scores = []
2273-
toktypes = []
2274-
2275-
for text, score, toktype in vocab.all_tokens():
2276-
tokens.append(text)
2277-
scores.append(score)
2278-
toktypes.append(toktype)
2279-
2280-
assert len(tokens) == vocab.vocab_size, (
2281-
f"token count ({len(tokens)}) != vocab size ({vocab.vocab_size})"
2282-
)
2283-
2284-
if vocab.tokenizer_type == gguf.vocab.MistralTokenizerType.tekken:
2285-
self.gguf_writer.add_tokenizer_pre("tekken")
2286-
self.gguf_writer.add_token_merges(
2287-
vocab.extract_vocab_merges_from_model()
2288-
)
2289-
2290-
logger.info(
2291-
f"Setting bos, eos, unk and pad token IDs to {vocab.bos_id}, {vocab.eos_id}, {vocab.unk_id}, {vocab.pad_id}."
2292-
)
2293-
2294-
self.gguf_writer.add_bos_token_id(vocab.bos_id)
2295-
self.gguf_writer.add_eos_token_id(vocab.eos_id)
2296-
self.gguf_writer.add_unk_token_id(vocab.unk_id)
2297-
self.gguf_writer.add_pad_token_id(vocab.pad_id)
2298-
2299-
self.gguf_writer.add_token_list(tokens)
2300-
self.gguf_writer.add_token_scores(scores)
2301-
self.gguf_writer.add_token_types(toktypes)
2302-
self.gguf_writer.add_vocab_size(vocab.vocab_size)
2303-
2304-
self.gguf_writer.add_add_bos_token(True)
2305-
self.gguf_writer.add_add_eos_token(False)
2306-
2307-
script_dir = Path(__file__).parent
2308-
template_path = script_dir / "models/templates/unsloth-mistral-Devstral-Small-2507.jinja"
2309-
with open(template_path, "r", encoding="utf-8") as f:
2310-
template = f.read()
2311-
self.gguf_writer.add_chat_template(template)
2312-
2313-
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
2314-
name = name.replace("language_model.", "")
2315-
if "multi_modal_projector" in name or "audio_tower" in name:
2316-
return []
2317-
return super().modify_tensors(data_torch, name, bid)
2318-
2319-
23202316
@ModelBase.register("DeciLMForCausalLM")
23212317
class DeciModel(TextModel):
23222318
model_arch = gguf.MODEL_ARCH.DECI

gguf-py/gguf/vocab.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from mistral_common.tokens.tokenizers.tekken import Tekkenizer
1919
# from mistral_common.tokens.tokenizers.utils import (
2020
# _filter_valid_tokenizer_files,
21-
# )
21+
# ) # FIXME: this function is removed in newer versions of mistral_common
2222
from mistral_common.tokens.tokenizers.sentencepiece import (
2323
SentencePieceTokenizer,
2424
)

0 commit comments

Comments
 (0)