Skip to content

Commit 1a43247

Browse files
committed
Merge branch 'master' into imatrix
2 parents 61a21a4 + f5e96b3 commit 1a43247

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+30660
-856
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ jobs:
342342
cd build
343343
export GGML_VK_VISIBLE_DEVICES=0
344344
# This is using llvmpipe and runs slower than other backends
345-
ctest -L main --verbose --timeout 3600
345+
ctest -L main --verbose --timeout 4200
346346
347347
ubuntu-22-cmake-hip:
348348
runs-on: ubuntu-22.04
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: Update Operations Documentation
2+
3+
on:
4+
push:
5+
paths:
6+
- 'docs/ops/**'
7+
- 'scripts/create_ops_docs.py'
8+
pull_request:
9+
paths:
10+
- 'docs/ops/**'
11+
- 'scripts/create_ops_docs.py'
12+
13+
jobs:
14+
update-ops-docs:
15+
runs-on: ubuntu-latest
16+
17+
steps:
18+
- name: Checkout repository
19+
uses: actions/checkout@v4
20+
21+
- name: Set up Python
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: '3.x'
25+
26+
- name: Generate operations documentation to temporary file
27+
run: |
28+
mkdir -p /tmp/ops_check
29+
./scripts/create_ops_docs.py /tmp/ops_check/ops.md
30+
31+
- name: Check if docs/ops.md matches generated version
32+
run: |
33+
if ! diff -q docs/ops.md /tmp/ops_check/ops.md; then
34+
echo "Operations documentation (docs/ops.md) is not up to date with the backend CSV files."
35+
echo "To fix: run ./scripts/create_ops_docs.py and commit the updated docs/ops.md along with your changes"
36+
echo "Differences found:"
37+
diff docs/ops.md /tmp/ops_check/ops.md || true
38+
exit 1
39+
fi
40+
echo "Operations documentation is up to date."

README.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
[![Release](https://img.shields.io/github/v/release/ggml-org/llama.cpp)](https://github.com/ggml-org/llama.cpp/releases)
77
[![Server](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml)
88

9-
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml)
9+
[Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml) / [ops](https://github.com/ggml-org/llama.cpp/blob/master/docs/ops.md)
1010

11-
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
11+
LLM inference in C/C++
1212

1313
## Recent API changes
1414

@@ -17,10 +17,9 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
1717

1818
## Hot topics
1919

20-
- 🔥 Multimodal support arrived in `llama-server`: [#12898](https://github.com/ggml-org/llama.cpp/pull/12898) | [documentation](./docs/multimodal.md)
21-
- A new binary `llama-mtmd-cli` is introduced to replace `llava-cli`, `minicpmv-cli`, `gemma3-cli` ([#13012](https://github.com/ggml-org/llama.cpp/pull/13012)) and `qwen2vl-cli` ([#13141](https://github.com/ggml-org/llama.cpp/pull/13141)), `libllava` will be deprecated
20+
- Hot PRs: [All](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+) | [Open](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+is%3Aopen)
21+
- Multimodal support arrived in `llama-server`: [#12898](https://github.com/ggml-org/llama.cpp/pull/12898) | [documentation](./docs/multimodal.md)
2222
- VS Code extension for FIM completions: https://github.com/ggml-org/llama.vscode
23-
- Universal [tool call support](./docs/function-calling.md) in `llama-server` https://github.com/ggml-org/llama.cpp/pull/9639
2423
- Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim
2524
- Introducing GGUF-my-LoRA https://github.com/ggml-org/llama.cpp/discussions/10123
2625
- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggml-org/llama.cpp/discussions/9669

common/CMakeLists.txt

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,7 @@ if (LLAMA_CURL)
8686
endif()
8787
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
8888
include_directories(${CURL_INCLUDE_DIRS})
89-
find_library(CURL_LIBRARY curl REQUIRED)
90-
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
89+
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
9190
endif ()
9291

9392
if (LLAMA_LLGUIDANCE)
@@ -112,13 +111,13 @@ if (LLAMA_LLGUIDANCE)
112111

113112
ExternalProject_Add(llguidance_ext
114113
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
115-
# v0.7.20 (+ fix to build on GCC 15):
116-
GIT_TAG b5b8b64dba11c4e4ee6b1d1450d3a3ae279891e8
114+
# v1.0.1:
115+
GIT_TAG d795912fedc7d393de740177ea9ea761e7905774
117116
PREFIX ${CMAKE_BINARY_DIR}/llguidance
118117
SOURCE_DIR ${LLGUIDANCE_SRC}
119118
BUILD_IN_SOURCE TRUE
120119
CONFIGURE_COMMAND ""
121-
BUILD_COMMAND cargo build --release
120+
BUILD_COMMAND cargo build --release --package llguidance
122121
INSTALL_COMMAND ""
123122
BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME} ${LLGUIDANCE_PATH}/llguidance.h
124123
UPDATE_COMMAND ""

common/arg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2741,6 +2741,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27412741
params.public_path = value;
27422742
}
27432743
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
2744+
add_opt(common_arg(
2745+
{"--api-prefix"}, "PREFIX",
2746+
string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()),
2747+
[](common_params & params, const std::string & value) {
2748+
params.api_prefix = value;
2749+
}
2750+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX"));
27442751
add_opt(common_arg(
27452752
{"--no-webui"},
27462753
string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ struct common_params {
370370

371371
std::string hostname = "127.0.0.1";
372372
std::string public_path = ""; // NOLINT
373+
std::string api_prefix = ""; // NOLINT
373374
std::string chat_template = ""; // NOLINT
374375
bool use_jinja = false; // NOLINT
375376
bool enable_chat_template = true;

0 commit comments

Comments
 (0)