Skip to content
This repository was archived by the owner on Sep 28, 2025. It is now read-only.

Commit 46afb6f

Browse files
committed
Changed:
- `llama.cpp` revision `6152129d05870cb38162c422c6ba80434e021e9f` Fixed: - Fixed build process, json patches. - Reverted server code to previous version due to bug.
1 parent 63ca880 commit 46afb6f

14 files changed

+1434
-239
lines changed

BUILD.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ poetry run clean
1818
poetry run cibuildwheel --output-dir wheelhouse --platform linux --arch x86_64 .
1919

2020
# aarch64
21-
docker run --rm --privileged linuxkit/binfmt:v0.8
21+
docker run --rm --privileged linuxkit/binfmt:v1.0.0
2222
poetry run cibuildwheel --output-dir wheelhouse --platform linux --arch aarch64 .
2323

2424
# pyodide, pyscript, wasm (NOTE: cannot be published to PyPI)

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
11
# CHANGELOG
22

3+
## v0.4.17
4+
5+
Changed:
6+
- `llama.cpp` revision `6152129d05870cb38162c422c6ba80434e021e9f`
7+
8+
Fixed:
9+
- Fixed build process, json patches.
10+
- Reverted server code to previous version due to bug.
11+
312
## v0.4.16
413

14+
Added:
15+
- Dynamically load/unload models while executing prompts in parallel.
16+
517
Changed:
618
- `llama.cpp` revision `adc5dd92e8aea98f5e7ac84f6e1bc15de35130b5`
719

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ NOTE: Currently supported operating system is **Linux** (`manylinux_2_28` and `m
2020

2121
## News
2222

23-
- **Jan 14 2025, v0.4.14+**: Modular llama.cpp build using `cmake` build system. Deprecated `make` build system.
23+
- **Jan 15 2025, v0.4.15**: Dynamically load/unload models while executing prompts in parallel.
24+
- **Jan 14 2025, v0.4.14**: Modular llama.cpp build using `cmake` build system. Deprecated `make` build system.
2425
- **Jan 1 2025, v0.3.1**: OpenAI compatible API, **text** and **vision** models. Added support for **Qwen2-VL** models. Hot-swap of models on demand in server/API.
2526
- **Dec 9 2024, v0.2.0**: Low-level and high-level APIs: llama, llava, clip and ggml API.
2627
- **Nov 27 2024, v0.1.22**: Support for Multimodal models such as **llava** and **minicpmv**.

examples/demo_openai_load_models.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88

99
client = OpenAI(
10-
base_url = 'http://localhost:11434/v1',
10+
# base_url = 'http://localhost:11434/v1',
11+
base_url = 'http://openai.tangledlabs.com/v1',
1112
api_key='llama-cpp-cffi',
1213
)
1314

@@ -49,7 +50,7 @@ def demo_text_chat_completions_stream():
4950
# llama-cpp-cffi
5051
extra_body=dict( # type: ignore
5152
n_ctx=4 * 1024,
52-
gpu_layers=99,
53+
gpu_layers=5,
5354
predict=512,
5455
),
5556
)
@@ -98,7 +99,7 @@ def func(model):
9899
# llama-cpp-cffi
99100
extra_body=dict( # type: ignore
100101
n_ctx=4 * 1024,
101-
gpu_layers=99,
102+
gpu_layers=5,
102103
predict=512,
103104
),
104105
)

json_hpp_7.patch

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
--- llama.cpp-master/common/json.hpp 2025-01-13 19:24:51.610146960 +0100
2+
+++ llama.cpp/common/json.hpp 2025-01-22 18:47:26.842856380 +0100
3+
@@ -18,6 +18,8 @@
4+
#ifndef INCLUDE_NLOHMANN_JSON_HPP_
5+
#define INCLUDE_NLOHMANN_JSON_HPP_
6+
7+
+#ifdef __cplusplus
8+
+
9+
#include <algorithm> // all_of, find, for_each
10+
#include <cstddef> // nullptr_t, ptrdiff_t, size_t
11+
#include <functional> // hash, less
12+
@@ -24761,6 +24763,6 @@
13+
#undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG
14+
#undef JSON_HEDLEY_FALL_THROUGH
15+
16+
-
17+
+#endif // __cplusplus
18+
19+
#endif // INCLUDE_NLOHMANN_JSON_HPP_

json_schema_to_grammar_cpp_7.patch

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
--- llama.cpp-master/common/json-schema-to-grammar.cpp 2025-01-22 18:26:47.628379203 +0100
2+
+++ llama.cpp/common/json-schema-to-grammar.cpp 2025-01-22 18:33:43.484435269 +0100
3+
@@ -13,6 +13,18 @@
4+
5+
using json = nlohmann::ordered_json;
6+
7+
+char * llama_json_schema_to_grammar(const char * c_value) {
8+
+ std::string value(c_value);
9+
+ std::string grammar = json_schema_to_grammar(json::parse(value));
10+
+
11+
+ // Allocate memory for the result string, including space for the null terminator
12+
+ char* result = new char[grammar.length() + 1];
13+
+ std::strcpy(result, grammar.c_str());
14+
+
15+
+ // The caller is now responsible for deleting this memory
16+
+ return result;
17+
+}
18+
+
19+
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
20+
auto has_max = max_items != std::numeric_limits<int>::max();
21+

json_schema_to_grammar_h_7.patch

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
--- llama.cpp-master/common/json-schema-to-grammar.h 2025-01-22 18:26:47.628379203 +0100
2+
+++ llama.cpp/common/json-schema-to-grammar.h 2025-01-22 18:52:19.832711144 +0100
3+
@@ -5,6 +5,27 @@
4+
#define JSON_ASSERT GGML_ASSERT
5+
#include "json.hpp"
6+
7+
+#ifdef LLAMA_SHARED
8+
+# if defined(_WIN32) && !defined(__MINGW32__)
9+
+# ifdef LLAMA_BUILD
10+
+# define LLAMA_API __declspec(dllexport)
11+
+# else
12+
+# define LLAMA_API __declspec(dllimport)
13+
+# endif
14+
+# else
15+
+# define LLAMA_API __attribute__ ((visibility ("default")))
16+
+# endif
17+
+#else
18+
+# define LLAMA_API
19+
+#endif
20+
+
21+
+#ifdef __cplusplus
22+
+extern "C" {
23+
+#endif
24+
+LLAMA_API char * llama_json_schema_to_grammar(const char * c_value);
25+
+#ifdef __cplusplus
26+
+}
27+
+
28+
std::string json_schema_to_grammar(const nlohmann::ordered_json & schema);
29+
30+
struct llama_grammar_builder {
31+
@@ -14,3 +35,5 @@
32+
};
33+
34+
std::string build_grammar(const std::function<void(const llama_grammar_builder &)> & cb);
35+
+
36+
+#endif

0 commit comments

Comments
 (0)