Skip to content

Commit a024747

Browse files
committed
Merge branch 'master' into llamacli-tools
2 parents a726ada + fef0cbe commit a024747

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+9515
-3765
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ on:
1010
push:
1111
branches:
1212
- master
13-
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
13+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
1414
pull_request:
1515
types: [opened, synchronize, reopened]
16-
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
16+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
1717

1818
concurrency:
1919
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}

.github/workflows/server.yml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,36 @@ jobs:
8181
with:
8282
node-version: '22.11.0'
8383

84+
- name: WebUI - Install dependencies
85+
id: webui_lint
86+
run: |
87+
cd examples/server/webui
88+
npm ci
89+
90+
- name: WebUI - Check code format
91+
id: webui_format
92+
run: |
93+
git config --global --add safe.directory $(realpath .)
94+
cd examples/server/webui
95+
git status
96+
97+
npm run format
98+
git status
99+
modified_files="$(git status -s)"
100+
echo "Modified files: ${modified_files}"
101+
if [ -n "${modified_files}" ]; then
102+
echo "Files do not follow coding style. To fix: npm run format"
103+
echo "${modified_files}"
104+
exit 1
105+
fi
106+
84107
- name: Verify bundled index.html
85108
id: verify_server_index_html
86109
run: |
87110
git config --global --add safe.directory $(realpath .)
88111
cd examples/server/webui
89112
git status
90-
npm ci
113+
91114
npm run build
92115
git status
93116
modified_files="$(git status -s)"

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,4 +233,4 @@ configure_file(cmake/llama.pc.in
233233
@ONLY)
234234

235235
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
236-
DESTINATION lib/pkgconfig)
236+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
189189
- [ramalama](https://github.com/containers/ramalama) (MIT)
190190
- [semperai/amica](https://github.com/semperai/amica) (MIT)
191191
- [withcatai/catai](https://github.com/withcatai/catai) (MIT)
192+
- [Autopen](https://github.com/blackhole89/autopen) (GPL)
192193

193194
</details>
194195

@@ -234,6 +235,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
234235
| [HIP](docs/build.md#hip) | AMD GPU |
235236
| [Vulkan](docs/build.md#vulkan) | GPU |
236237
| [CANN](docs/build.md#cann) | Ascend NPU |
238+
| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
237239

238240
## Building the project
239241

cmake/llama.pc.in

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
prefix=@CMAKE_INSTALL_PREFIX@
2-
exec_prefix=${prefix}
3-
libdir=${exec_prefix}/lib
4-
includedir=${prefix}/include
2+
exec_prefix=@CMAKE_INSTALL_PREFIX@
3+
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
4+
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
55

66
Name: llama
77
Description: Port of Facebook's LLaMA model in C/C++
8-
Version: @PROJECT_VERSION@
9-
Libs: -L${libdir} -lggml -lggml-base -lllama
8+
Version: @LLAMA_INSTALL_VERSION@
9+
Libs: -L${libdir} -lggml -lggml-base -lllama
1010
Cflags: -I${includedir}

common/arg.cpp

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
674674
));
675675
add_opt(common_arg(
676676
{"--no-context-shift"},
677-
string_format("disables context shift on inifinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
677+
string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
678678
[](common_params & params) {
679679
params.ctx_shift = false;
680680
}
@@ -2341,5 +2341,47 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23412341
}
23422342
).set_examples({LLAMA_EXAMPLE_TTS}));
23432343

2344+
add_opt(common_arg(
2345+
{"--embd-bge-small-en-default"},
2346+
string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"),
2347+
[](common_params & params) {
2348+
params.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF";
2349+
params.hf_file = "bge-small-en-v1.5-q8_0.gguf";
2350+
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
2351+
params.embd_normalize = 2;
2352+
params.n_ctx = 512;
2353+
params.verbose_prompt = true;
2354+
params.embedding = true;
2355+
}
2356+
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
2357+
2358+
add_opt(common_arg(
2359+
{"--embd-e5-small-en-default"},
2360+
string_format("use default e5-small-v2 model (note: can download weights from the internet)"),
2361+
[](common_params & params) {
2362+
params.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF";
2363+
params.hf_file = "e5-small-v2-q8_0.gguf";
2364+
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
2365+
params.embd_normalize = 2;
2366+
params.n_ctx = 512;
2367+
params.verbose_prompt = true;
2368+
params.embedding = true;
2369+
}
2370+
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
2371+
2372+
add_opt(common_arg(
2373+
{"--embd-gte-small-default"},
2374+
string_format("use default gte-small model (note: can download weights from the internet)"),
2375+
[](common_params & params) {
2376+
params.hf_repo = "ggml-org/gte-small-Q8_0-GGUF";
2377+
params.hf_file = "gte-small-q8_0.gguf";
2378+
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
2379+
params.embd_normalize = 2;
2380+
params.n_ctx = 512;
2381+
params.verbose_prompt = true;
2382+
params.embedding = true;
2383+
}
2384+
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
2385+
23442386
return ctx_arg;
23452387
}

common/chat-template.hpp

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -249,16 +249,30 @@ class chat_template {
249249
inputs.add_generation_prompt = false;
250250
full = apply(inputs);
251251
}
252-
253-
if (full.find(prefix) != 0) {
254-
if (prefix.rfind(eos_token_) == prefix.size() - eos_token_.size()) {
255-
prefix = prefix.substr(0, prefix.size() - eos_token_.size());
252+
auto eos_pos_last = full.rfind(eos_token_);
253+
if (eos_pos_last == prefix.size() - eos_token_.size() ||
254+
(full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
255+
full = full.substr(0, eos_pos_last);
256+
}
257+
size_t common_prefix_length = 0;
258+
for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
259+
if (prefix[i] != full[i]) {
260+
break;
256261
}
262+
if (prefix[i] == '<') {
263+
// DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
264+
// but it removes thinking tags for past messages.
265+
// The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
266+
continue;
267+
}
268+
common_prefix_length = i + 1;
257269
}
258-
if (full.find(prefix) != 0) {
270+
auto example = full.substr(common_prefix_length);
271+
if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
259272
fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
273+
} else {
274+
tool_call_example_ = example;
260275
}
261-
tool_call_example_ = full.substr(prefix.size());
262276
}
263277
} catch (const std::exception & e) {
264278
fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
@@ -363,7 +377,7 @@ class chat_template {
363377
if (polyfill_tools) {
364378
adjusted_messages = add_system(inputs.messages,
365379
"You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
366-
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_));
380+
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
367381
} else {
368382
adjusted_messages = inputs.messages;
369383
}

common/common.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -454,13 +454,13 @@ bool set_process_priority(enum ggml_sched_priority prio);
454454
//
455455

456456
#ifdef __GNUC__
457-
#ifdef __MINGW32__
458-
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
457+
# if defined(__MINGW32__) && !defined(__clang__)
458+
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
459+
# else
460+
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
461+
# endif
459462
#else
460-
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
461-
#endif
462-
#else
463-
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
463+
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
464464
#endif
465465

466466
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)

common/llguidance.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,10 @@ llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, const char * g
254254
};
255255
}
256256

257-
return new llama_sampler{
257+
return llama_sampler_init(
258258
/* .iface = */ &llama_sampler_llg_i,
259-
/* .ctx = */ ctx,
260-
};
259+
/* .ctx = */ ctx
260+
);
261261
}
262262

263263
#else

common/log.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "ggml.h" // for ggml_log_level
44

5+
#define LOG_CLR_TO_EOL "\033[K\r"
56
#define LOG_COL_DEFAULT "\033[0m"
67
#define LOG_COL_BOLD "\033[1m"
78
#define LOG_COL_RED "\033[31m"
@@ -14,7 +15,7 @@
1415

1516
#ifndef __GNUC__
1617
# define LOG_ATTRIBUTE_FORMAT(...)
17-
#elif defined(__MINGW32__)
18+
#elif defined(__MINGW32__) && !defined(__clang__)
1819
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
1920
#else
2021
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))

0 commit comments

Comments
 (0)