Skip to content

Commit 0fa2a81

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents d7b5465 + be48528 commit 0fa2a81

File tree

23 files changed

+758
-617
lines changed

23 files changed

+758
-617
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
240240
<details>
241241
<summary>Infrastructure</summary>
242242

243-
- [Paddler](https://github.com/distantmagic/paddler) - Stateful load balancer custom-tailored for llama.cpp
243+
- [Paddler](https://github.com/intentee/paddler) - Open-source LLMOps platform for hosting and scaling AI in your own infrastructure
244244
- [GPUStack](https://github.com/gpustack/gpustack) - Manage GPU clusters for running LLMs
245245
- [llama_cpp_canister](https://github.com/onicai/llama_cpp_canister) - llama.cpp as a smart contract on the Internet Computer, using WebAssembly
246246
- [llama-swap](https://github.com/mostlygeek/llama-swap) - transparent proxy that adds automatic model switching with llama-server

common/arg.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2949,11 +2949,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
29492949
"- deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)\n"
29502950
"(default: auto)",
29512951
[](common_params & params, const std::string & value) {
2952-
/**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
2953-
else if (value == "deepseek-legacy") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; }
2954-
else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
2955-
else if (value == "auto") { params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; }
2956-
else { throw std::invalid_argument("invalid value"); }
2952+
params.reasoning_format = common_reasoning_format_from_name(value);
29572953
}
29582954
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK"));
29592955
add_opt(common_arg(

common/chat.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,17 @@ common_chat_templates_ptr common_chat_templates_init(
552552
default_template_src = CHATML_TEMPLATE_SRC;
553553
}
554554
}
555+
556+
// TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error
557+
// Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633
558+
if (default_template_src.find("<|channel|>") != std::string::npos
559+
// search for the error message and patch it
560+
&& default_template_src.find("in message.content or") != std::string::npos) {
561+
string_replace_all(default_template_src,
562+
"{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}",
563+
"{%- if false %}");
564+
}
565+
555566
std::string token_bos = bos_token_override;
556567
std::string token_eos = eos_token_override;
557568
bool add_bos = false;
@@ -625,6 +636,19 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
625636
}
626637
}
627638

639+
common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
640+
if (format == "none") {
641+
return COMMON_REASONING_FORMAT_NONE;
642+
} else if (format == "auto") {
643+
return COMMON_REASONING_FORMAT_AUTO;
644+
} else if (format == "deepseek") {
645+
return COMMON_REASONING_FORMAT_DEEPSEEK;
646+
} else if (format == "deepseek-legacy") {
647+
return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
648+
}
649+
throw std::runtime_error("Unknown reasoning format: " + format);
650+
}
651+
628652
static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
629653
std::string arguments;
630654
if (builder.is_partial()) {

common/chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ std::string common_chat_format_example(
191191

192192
const char* common_chat_format_name(common_chat_format format);
193193
const char* common_reasoning_format_name(common_reasoning_format format);
194+
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
194195
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
195196

196197
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);

convert_hf_to_gguf.py

Lines changed: 255 additions & 99 deletions
Large diffs are not rendered by default.

convert_lora_to_gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
340340
sys.exit(1)
341341
else:
342342
logger.info(f"Loading base model: {dir_base_model.name}")
343-
hparams = ModelBase.load_hparams(dir_base_model)
343+
hparams = ModelBase.load_hparams(dir_base_model, False)
344344

345345
with torch.inference_mode():
346346
try:

docs/multimodal/minicpmo2.6.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ If there are differences in usage, please refer to the official build [documenta
1313

1414
Clone llama.cpp:
1515
```bash
16-
git clone https://github.com/ggerganov/llama.cpp
16+
git clone https://github.com/ggml-org/llama.cpp
1717
cd llama.cpp
1818
```
1919

docs/multimodal/minicpmv2.6.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ If there are differences in usage, please refer to the official build [documenta
1212

1313
Clone llama.cpp:
1414
```bash
15-
git clone https://github.com/ggerganov/llama.cpp
15+
git clone https://github.com/ggml-org/llama.cpp
1616
cd llama.cpp
1717
```
1818

0 commit comments

Comments
 (0)