Skip to content

Commit 07144a8

Browse files
committed
support fp8 models; upload 1 model.
1 parent 0ee543a commit 07144a8

File tree

3 files changed

+40
-1
lines changed

3 files changed

+40
-1
lines changed

convert.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ class TokenizerType(Enum):
273273
g_tokenizer_type = TokenizerType.BPE1
274274

275275
g_special_tokens: Dict = {}
276+
g_do_dequantization: bool = False
276277

277278
def pad_to_len(l: list, to_len: int, v = 0) -> list:
278279
assert len(l) <= to_len
@@ -605,7 +606,22 @@ def format_time(t) -> str:
605606
remain = (total - i - 1) * per_item
606607
print_progress_bar(i + 1, total, prefix=desc, suffix=f"({i}/{total}) {format_time(per_item)}/it rem: {format_time(remain)}")
607608

609+
def dequantize(state_dict: dict) -> dict:
610+
r = {}
611+
for k in state_dict.keys():
612+
t: torch.Tensor = state_dict[k]
613+
if k.endswith('.weight_scale_inv'):
614+
k = k.replace('.weight_scale_inv', '.weight')
615+
assert k in state_dict
616+
r[k] = state_dict[k].float() * t
617+
continue
618+
if k not in r:
619+
r[k] = t
620+
return r
621+
608622
def dump_state_dict(f, weight_names, model_files, ggml_type, config, state_dict_pp, loader_fun = None):
623+
global g_do_dequantization
624+
609625
tensor_info = []
610626
converted_names = []
611627

@@ -618,6 +634,10 @@ def dump_state_dict(f, weight_names, model_files, ggml_type, config, state_dict_
618634

619635
for state_dict in loader_fun(model_files):
620636
this_round = []
637+
638+
if g_do_dequantization:
639+
state_dict = dequantize(state_dict)
640+
621641
state_dict = state_dict_pp(config, state_dict)
622642

623643
for x in state_dict:
@@ -8462,7 +8482,7 @@ def load_some_model(path: Path, fallback_files: list[Path] = []) -> List[Path]:
84628482
return [path]
84638483

84648484
def main():
8465-
global g_lora
8485+
global g_lora, g_do_dequantization
84668486

84678487
parser = argparse.ArgumentParser("chatllm-convert")
84688488
parser.add_argument("-i", "--model_name_or_path", type=str)
@@ -8499,6 +8519,8 @@ def main():
84998519
else:
85008520
config = AttributeDict({})
85018521

8522+
g_do_dequantization = config.quantization_config is not None
8523+
85028524
if arch == '':
85038525
if config.architectures is None:
85048526
if "model_type" in config:

docs/models.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ Please use `--format completion` for these models.
399399
[3B-Reasoning-2512](https://huggingface.co/mistralai/Ministral-3-3B-Reasoning-2512/tree/039f888eb54340b5e9870721f3c249fbc809b8e8),
400400
[8B-Instruct-2512](https://huggingface.co/mistralai/Ministral-3-8B-Instruct-2512-BF16/tree/bde2b3370dbf8ad77ceab25a5a43bc9013cda350),
401401
[8B-Reasoning-2512](https://huggingface.co/mistralai/Ministral-3-8B-Reasoning-2512/tree/f511871f6402ba68dadfb42a94a7a7e13499fd65)
402+
* [x] Devstral-Small-2: [24B-Instruct-2512](https://huggingface.co/mistralai/Devstral-Small-2-24B-Instruct-2512/tree/8d27a0d2120f1563c11dc91d494e99f9678ecf79)
402403

403404
* Qwen (`Qwen2AudioForConditionalGeneration`, `Qwen2_5_VLForConditionalGeneration`)
404405
* [x] Qwen2-Audio: [7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct/tree/0a095220c30b7b31434169c3086508ef3ea5bf0a)

scripts/models.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,22 @@
285285
}
286286
}
287287
},
288+
"devstral-small-2": {
289+
"brief": "Devstral Small 2 excels at using tools to explore codebases, editing multiple files and power software engineering agents.",
290+
"default": "24b-2512",
291+
"license": "Apache License Version 2.0",
292+
"variants": {
293+
"24b-2512": {
294+
"default": "q4_1",
295+
"quantized": {
296+
"q4_1": {
297+
"size": 15015421232,
298+
"url": "chatllm_quantized_ministral-3/devstral-small-2-24b-2512-q4_1.bin"
299+
}
300+
}
301+
}
302+
}
303+
},
288304
"mistral0.1": {
289305
"brief": "The Mistral-7B-Instruct-v0.1 Large Language Model (LLM) is a instruct fine-tuned version of the Mistral-7B-v0.1 generative text model using a variety of publicly available conversation datasets.",
290306
"default": "7b",

0 commit comments

Comments
 (0)