Skip to content

Commit 967a2e1

Browse files
saood06ngxsoncompilade
authored
Fix missing rope_freqs with convert_hf_to_gguf (#402)
* lora : fix llama conversion script with ROPE_FREQS * convert : refactor rope_freqs generation This should also fix vocab-only conversion for Phi-3. * convert : adapt MiniCPM3 to separate rope_freqs insertion MiniCPM3's tokenizer is treated as a SentencePiece tokenizer to avoid having to run its custom Python code which mixes tokenization in the same file as tool calls. gguf-py : add long and short RoPE factors to tensor mappings Empty, but the key names are used to populate the mappings. --------- Co-authored-by: Xuan Son Nguyen <[email protected]> Co-authored-by: Francis Couture-Harpin <[email protected]>
1 parent e5a4a3c commit 967a2e1

File tree

4 files changed

+27
-5
lines changed

4 files changed

+27
-5
lines changed

convert_hf_to_gguf.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pathlib import Path
1515
from hashlib import sha256
1616
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast
17+
from itertools import chain
1718

1819
import math
1920
import numpy as np
@@ -256,10 +257,14 @@ def tensor_force_quant(self, name: str, new_name: str, bid: int | None, n_dims:
256257

257258
return False
258259

260+
# some models need extra generated tensors (like rope_freqs)
261+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
262+
return ()
263+
259264
def prepare_tensors(self):
260265
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
261266

262-
for name, data_torch in self.get_tensors():
267+
for name, data_torch in chain(self.generate_extra_tensors(), self.get_tensors()):
263268
# we don't need these
264269
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
265270
continue
@@ -1559,7 +1564,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
15591564

15601565
return [(self.map_tensor_name(name), data_torch)]
15611566

1562-
def prepare_tensors(self):
1567+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
15631568
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
15641569
if rope_scaling.get("rope_type", '').lower() == "llama3":
15651570
base = self.hparams.get("rope_theta", 10000.0)
@@ -1586,8 +1591,9 @@ def prepare_tensors(self):
15861591
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
15871592
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
15881593

1589-
self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
1594+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
15901595

1596+
def prepare_tensors(self):
15911597
super().prepare_tensors()
15921598

15931599
if self._experts is not None:
@@ -2307,6 +2313,13 @@ def set_gguf_parameters(self):
23072313
self.gguf_writer.add_file_type(self.ftype)
23082314
self.gguf_writer.add_sliding_window(self.find_hparam(["sliding_window"]))
23092315

2316+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
2317+
n_embd = self.find_hparam(["hidden_size", "n_embd"])
2318+
n_head = self.find_hparam(["num_attention_heads", "n_head"])
2319+
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
2320+
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
2321+
rope_dims = n_embd // n_head
2322+
23102323
# write rope scaling for long context (128k) model
23112324
rope_scaling = self.find_hparam(['rope_scaling'], True)
23122325
if rope_scaling is None:
@@ -2336,8 +2349,8 @@ def set_gguf_parameters(self):
23362349
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
23372350
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
23382351

2339-
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
2340-
self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))
2352+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_LONG), torch.tensor(long_factors, dtype=torch.float32))
2353+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32))
23412354

23422355

23432356
@Model.register("PlamoForCausalLM")

convert_lora_to_gguf.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,10 @@ def set_gguf_parameters(self):
331331
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha)
332332
super().set_gguf_parameters()
333333

334+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
335+
# Never add extra tensors (e.g. rope_freqs) for LoRA adapters
336+
return ()
337+
334338
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
335339
tensor_map: dict[str, PartialLoraTensor] = {}
336340

gguf-py/gguf/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,8 @@ class MODEL_TENSOR(IntEnum):
806806
MODEL_TENSOR.TOKEN_EMBD,
807807
MODEL_TENSOR.OUTPUT_NORM,
808808
MODEL_TENSOR.OUTPUT,
809+
MODEL_TENSOR.ROPE_FACTORS_LONG,
810+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
809811
MODEL_TENSOR.ATTN_NORM,
810812
MODEL_TENSOR.ATTN_QKV,
811813
MODEL_TENSOR.ATTN_Q,

gguf-py/gguf/tensor_mapping.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ class TensorNameMap:
8282
"rope.freqs", # llama-pth
8383
"rotary_pos_emb.inv_freq", # chatglm
8484
),
85+
86+
MODEL_TENSOR.ROPE_FACTORS_LONG: (),
87+
MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
8588
}
8689

8790
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {

0 commit comments

Comments
 (0)