Skip to content

Commit f17d4d3

Browse files
authored
Merge branch 'master' into musa
2 parents 140d6a0 + 27e8a23 commit f17d4d3

File tree

104 files changed

+10755
-3980
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+10755
-3980
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ on:
1010
push:
1111
branches:
1212
- master
13-
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
13+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
1414
pull_request:
1515
types: [opened, synchronize, reopened]
16-
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
16+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
1717

1818
concurrency:
1919
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}

.github/workflows/server.yml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,36 @@ jobs:
8181
with:
8282
node-version: '22.11.0'
8383

84+
- name: WebUI - Install dependencies
85+
id: webui_lint
86+
run: |
87+
cd examples/server/webui
88+
npm ci
89+
90+
- name: WebUI - Check code format
91+
id: webui_format
92+
run: |
93+
git config --global --add safe.directory $(realpath .)
94+
cd examples/server/webui
95+
git status
96+
97+
npm run format
98+
git status
99+
modified_files="$(git status -s)"
100+
echo "Modified files: ${modified_files}"
101+
if [ -n "${modified_files}" ]; then
102+
echo "Files do not follow coding style. To fix: npm run format"
103+
echo "${modified_files}"
104+
exit 1
105+
fi
106+
84107
- name: Verify bundled index.html
85108
id: verify_server_index_html
86109
run: |
87110
git config --global --add safe.directory $(realpath .)
88111
cd examples/server/webui
89112
git status
90-
npm ci
113+
91114
npm run build
92115
git status
93116
modified_files="$(git status -s)"

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,4 +235,4 @@ configure_file(cmake/llama.pc.in
235235
@ONLY)
236236

237237
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
238-
DESTINATION lib/pkgconfig)
238+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
189189
- [ramalama](https://github.com/containers/ramalama) (MIT)
190190
- [semperai/amica](https://github.com/semperai/amica) (MIT)
191191
- [withcatai/catai](https://github.com/withcatai/catai) (MIT)
192+
- [Autopen](https://github.com/blackhole89/autopen) (GPL)
192193

193194
</details>
194195

@@ -234,6 +235,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
234235
| [HIP](docs/build.md#hip) | AMD GPU |
235236
| [Vulkan](docs/build.md#vulkan) | GPU |
236237
| [CANN](docs/build.md#cann) | Ascend NPU |
238+
| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
237239

238240
## Building the project
239241

cmake/llama.pc.in

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
prefix=@CMAKE_INSTALL_PREFIX@
2-
exec_prefix=${prefix}
3-
libdir=${exec_prefix}/lib
4-
includedir=${prefix}/include
2+
exec_prefix=@CMAKE_INSTALL_PREFIX@
3+
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
4+
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
55

66
Name: llama
77
Description: Port of Facebook's LLaMA model in C/C++
8-
Version: @PROJECT_VERSION@
9-
Libs: -L${libdir} -lggml -lggml-base -lllama
8+
Version: @LLAMA_INSTALL_VERSION@
9+
Libs: -L${libdir} -lggml -lggml-base -lllama
1010
Cflags: -I${includedir}

common/arg.cpp

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
674674
));
675675
add_opt(common_arg(
676676
{"--no-context-shift"},
677-
string_format("disables context shift on inifinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
677+
string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
678678
[](common_params & params) {
679679
params.ctx_shift = false;
680680
}
@@ -946,6 +946,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
946946
params.sampling.min_p = std::stof(value);
947947
}
948948
).set_sparam());
949+
add_opt(common_arg(
950+
{"--top-nsigma"}, "N",
951+
string_format("top-n-sigma sampling (default: %.1f, -1.0 = disabled)", params.sampling.top_n_sigma),
952+
[](common_params & params, const std::string & value) {
953+
params.sampling.top_n_sigma = std::stof(value);
954+
}
955+
).set_examples({LLAMA_EXAMPLE_MAIN}).set_sparam());
949956
add_opt(common_arg(
950957
{"--xtc-probability"}, "N",
951958
string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
@@ -2324,5 +2331,47 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23242331
}
23252332
).set_examples({LLAMA_EXAMPLE_TTS}));
23262333

2334+
add_opt(common_arg(
2335+
{"--embd-bge-small-en-default"},
2336+
string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"),
2337+
[](common_params & params) {
2338+
params.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF";
2339+
params.hf_file = "bge-small-en-v1.5-q8_0.gguf";
2340+
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
2341+
params.embd_normalize = 2;
2342+
params.n_ctx = 512;
2343+
params.verbose_prompt = true;
2344+
params.embedding = true;
2345+
}
2346+
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
2347+
2348+
add_opt(common_arg(
2349+
{"--embd-e5-small-en-default"},
2350+
string_format("use default e5-small-v2 model (note: can download weights from the internet)"),
2351+
[](common_params & params) {
2352+
params.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF";
2353+
params.hf_file = "e5-small-v2-q8_0.gguf";
2354+
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
2355+
params.embd_normalize = 2;
2356+
params.n_ctx = 512;
2357+
params.verbose_prompt = true;
2358+
params.embedding = true;
2359+
}
2360+
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
2361+
2362+
add_opt(common_arg(
2363+
{"--embd-gte-small-default"},
2364+
string_format("use default gte-small model (note: can download weights from the internet)"),
2365+
[](common_params & params) {
2366+
params.hf_repo = "ggml-org/gte-small-Q8_0-GGUF";
2367+
params.hf_file = "gte-small-q8_0.gguf";
2368+
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
2369+
params.embd_normalize = 2;
2370+
params.n_ctx = 512;
2371+
params.verbose_prompt = true;
2372+
params.embedding = true;
2373+
}
2374+
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
2375+
23272376
return ctx_arg;
23282377
}

common/chat-template.hpp

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -249,16 +249,30 @@ class chat_template {
249249
inputs.add_generation_prompt = false;
250250
full = apply(inputs);
251251
}
252-
253-
if (full.find(prefix) != 0) {
254-
if (prefix.rfind(eos_token_) == prefix.size() - eos_token_.size()) {
255-
prefix = prefix.substr(0, prefix.size() - eos_token_.size());
252+
auto eos_pos_last = full.rfind(eos_token_);
253+
if (eos_pos_last == prefix.size() - eos_token_.size() ||
254+
(full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
255+
full = full.substr(0, eos_pos_last);
256+
}
257+
size_t common_prefix_length = 0;
258+
for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
259+
if (prefix[i] != full[i]) {
260+
break;
256261
}
262+
if (prefix[i] == '<') {
263+
// DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
264+
// but it removes thinking tags for past messages.
265+
// The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
266+
continue;
267+
}
268+
common_prefix_length = i + 1;
257269
}
258-
if (full.find(prefix) != 0) {
270+
auto example = full.substr(common_prefix_length);
271+
if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
259272
fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
273+
} else {
274+
tool_call_example_ = example;
260275
}
261-
tool_call_example_ = full.substr(prefix.size());
262276
}
263277
} catch (const std::exception & e) {
264278
fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
@@ -363,7 +377,7 @@ class chat_template {
363377
if (polyfill_tools) {
364378
adjusted_messages = add_system(inputs.messages,
365379
"You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
366-
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_));
380+
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
367381
} else {
368382
adjusted_messages = inputs.messages;
369383
}

common/common.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ struct common_params_sampling {
140140
int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty
141141
int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
142142
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
143+
float top_n_sigma = -1.00f;// -1.0 = disabled
143144
float mirostat_tau = 5.00f; // target entropy
144145
float mirostat_eta = 0.10f; // learning rate
145146
bool ignore_eos = false;
@@ -424,13 +425,13 @@ bool set_process_priority(enum ggml_sched_priority prio);
424425
//
425426

426427
#ifdef __GNUC__
427-
#ifdef __MINGW32__
428-
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
428+
# if defined(__MINGW32__) && !defined(__clang__)
429+
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
430+
# else
431+
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
432+
# endif
429433
#else
430-
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
431-
#endif
432-
#else
433-
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
434+
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
434435
#endif
435436

436437
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)

common/llguidance.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,10 @@ llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, const char * g
254254
};
255255
}
256256

257-
return new llama_sampler{
257+
return llama_sampler_init(
258258
/* .iface = */ &llama_sampler_llg_i,
259-
/* .ctx = */ ctx,
260-
};
259+
/* .ctx = */ ctx
260+
);
261261
}
262262

263263
#else

common/log.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "log.h"
22

3+
#include <chrono>
34
#include <condition_variable>
45
#include <cstdarg>
56
#include <cstdio>

0 commit comments

Comments
 (0)