Skip to content

Commit 4c56b7c

Browse files
committed
Merge branch 'upstream' into concedo_experimental
# Conflicts: # README.md # examples/gbnf-validator/gbnf-validator.cpp # examples/llava/clip.cpp # examples/run/README.md # examples/run/run.cpp # examples/server/README.md # ggml/src/ggml-cpu/CMakeLists.txt # src/llama.cpp # tests/test-grammar-integration.cpp # tests/test-llama-grammar.cpp
2 parents b7d3274 + e34c5af commit 4c56b7c

File tree

19 files changed

+550
-226
lines changed

19 files changed

+550
-226
lines changed

common/arg.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
627627
[](common_params & params) {
628628
params.ctx_shift = false;
629629
}
630-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
630+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
631631
add_opt(common_arg(
632632
{"--chunks"}, "N",
633633
string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
@@ -2207,5 +2207,17 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22072207
}
22082208
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
22092209

2210+
// model-specific
2211+
add_opt(common_arg(
2212+
{"--tts-oute-default"},
2213+
string_format("use default OuteTTS models (note: can download weights from the internet)"),
2214+
[](common_params & params) {
2215+
params.hf_repo = "OuteAI/OuteTTS-0.2-500M-GGUF";
2216+
params.hf_file = "OuteTTS-0.2-500M-Q8_0.gguf";
2217+
params.vocoder.hf_repo = "ggml-org/WavTokenizer";
2218+
params.vocoder.hf_file = "WavTokenizer-Large-75-F16.gguf";
2219+
}
2220+
).set_examples({LLAMA_EXAMPLE_TTS}));
2221+
22102222
return ctx_arg;
22112223
}

convert_hf_to_gguf.py

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2200,6 +2200,15 @@ class Phi3MiniModel(Model):
22002200
model_arch = gguf.MODEL_ARCH.PHI3
22012201

22022202
def set_vocab(self):
2203+
# Phi-4 model uses GPT2Tokenizer
2204+
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
2205+
if tokenizer_config_file.is_file():
2206+
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
2207+
tokenizer_config_json = json.load(f)
2208+
tokenizer_class = tokenizer_config_json['tokenizer_class']
2209+
if tokenizer_class == 'GPT2Tokenizer':
2210+
return self._set_vocab_gpt2()
2211+
22032212
from sentencepiece import SentencePieceProcessor
22042213

22052214
tokenizer_path = self.dir_model / 'tokenizer.model'
@@ -2316,7 +2325,11 @@ def set_gguf_parameters(self):
23162325
self.gguf_writer.add_rope_dimension_count(rope_dims)
23172326
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
23182327
self.gguf_writer.add_file_type(self.ftype)
2319-
self.gguf_writer.add_sliding_window(self.find_hparam(["sliding_window"]))
2328+
sliding_window = self.hparams.get("sliding_window")
2329+
# use zero value of sliding_window to distinguish Phi-4 from other PHI3 models
2330+
if sliding_window is None:
2331+
sliding_window = 0
2332+
self.gguf_writer.add_sliding_window(sliding_window)
23202333

23212334
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
23222335
n_embd = self.find_hparam(["hidden_size", "n_embd"])
@@ -2615,7 +2628,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
26152628
return [(self.map_tensor_name(name), data_torch)]
26162629

26172630

2618-
@Model.register("BertModel", "CamembertModel", "RobertaModel")
2631+
@Model.register("BertModel", "CamembertModel")
26192632
class BertModel(Model):
26202633
model_arch = gguf.MODEL_ARCH.BERT
26212634

@@ -2688,6 +2701,51 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
26882701
return [(self.map_tensor_name(name), data_torch)]
26892702

26902703

2704+
@Model.register("RobertaModel")
2705+
class RobertaModel(BertModel):
2706+
model_arch = gguf.MODEL_ARCH.BERT
2707+
2708+
def __init__(self, *args, **kwargs):
2709+
super().__init__(*args, **kwargs)
2710+
2711+
# we need the pad_token_id to know how to chop down position_embd matrix
2712+
if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
2713+
self._position_offset = 1 + pad_token_id
2714+
if "max_position_embeddings" in self.hparams:
2715+
self.hparams["max_position_embeddings"] -= self._position_offset
2716+
else:
2717+
self._position_offset = None
2718+
2719+
def set_vocab(self):
2720+
"""Support BPE tokenizers for roberta models"""
2721+
bpe_tok_path = self.dir_model / "tokenizer.json"
2722+
if bpe_tok_path.exists():
2723+
self._set_vocab_gpt2()
2724+
self.gguf_writer.add_add_bos_token(True)
2725+
self.gguf_writer.add_add_eos_token(True)
2726+
2727+
# we need this to validate the size of the token_type embeddings
2728+
# though currently we are passing all zeros to the token_type embeddings
2729+
# "Sequence A" or "Sequence B"
2730+
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
2731+
2732+
else:
2733+
return super().set_vocab()
2734+
2735+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2736+
# if name starts with "roberta.", remove the prefix
2737+
# e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
2738+
if name.startswith("roberta."):
2739+
name = name[8:]
2740+
2741+
# position embeddings start at pad_token_id + 1, so just chop down the weight tensor
2742+
if name == "embeddings.position_embeddings.weight":
2743+
if self._position_offset is not None:
2744+
data_torch = data_torch[self._position_offset:,:]
2745+
2746+
return super().modify_tensors(data_torch, name, bid)
2747+
2748+
26912749
@Model.register("NomicBertModel")
26922750
class NomicBertModel(BertModel):
26932751
model_arch = gguf.MODEL_ARCH.NOMIC_BERT
@@ -3007,6 +3065,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
30073065
if new_name.endswith("time_mix_w2.weight"):
30083066
data_torch = data_torch.permute(0, 2, 1)
30093067

3068+
if new_name.endswith("time_mix_decay.weight") or "lerp" in new_name:
3069+
data_torch = data_torch.squeeze()
3070+
30103071
rescale_every_n_layers = self.hparams["rescale_every"]
30113072
if rescale_every_n_layers > 0:
30123073
if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):
428 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)