Skip to content

Commit bbba3f2

Browse files
committed
Merge branch 'master' into 16061-mobile-message-actions-ui
2 parents a3648ca + 1eeb523 commit bbba3f2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+2257
-1825
lines changed

common/arg.cpp

Lines changed: 266 additions & 202 deletions
Large diffs are not rendered by default.

common/chat.cpp

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,10 +1741,12 @@ static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
17411741
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
17421742
LOG_DBG("%s\n", __func__);
17431743
common_chat_params data;
1744-
data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ json(), json {
1744+
const std::optional<json> tools_override = json();
1745+
const std::optional<json> additional_context = json {
17451746
{"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
17461747
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
1747-
});
1748+
};
1749+
data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, tools_override, additional_context);
17481750
if (inputs.tools.is_array() && !inputs.tools.empty()) {
17491751
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
17501752
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
@@ -2230,15 +2232,28 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
22302232

22312233
static void common_chat_parse_granite(common_chat_msg_parser & builder) {
22322234
// Parse thinking tags
2235+
static const common_regex start_think_regex(regex_escape("<think>"));
2236+
static const common_regex end_think_regex(regex_escape("</think>"));
2237+
// Granite models output partial tokens such as "<" and "<think".
2238+
// By leveraging try_consume_regex()/try_find_regex() throwing
2239+
// common_chat_msg_partial_exception for these partial tokens,
2240+
// processing is interrupted and the tokens are not passed to add_content().
2241+
if (auto res = builder.try_consume_regex(start_think_regex)) {
2242+
// Restore position for try_parse_reasoning()
2243+
builder.move_to(res->groups[0].begin);
2244+
builder.try_find_regex(end_think_regex, std::string::npos, false);
2245+
// Restore position for try_parse_reasoning()
2246+
builder.move_to(res->groups[0].begin);
2247+
}
22332248
builder.try_parse_reasoning("<think>", "</think>");
22342249

2235-
// Parse response tags using regex
2236-
static const common_regex response_regex("<response>([\\s\\S]*?)</response>");
2237-
if (auto res = builder.try_find_regex(response_regex)) {
2238-
// Extract the content between the tags (capture group 1)
2239-
auto content = builder.str(res->groups[1]);
2240-
builder.add_content(content);
2241-
builder.move_to(res->groups[0].end);
2250+
// Parse response tags
2251+
static const common_regex start_response_regex(regex_escape("<response>"));
2252+
static const common_regex end_response_regex(regex_escape("</response>"));
2253+
// Granite models output partial tokens such as "<" and "<response".
2254+
// Same hack as reasoning parsing.
2255+
if (builder.try_consume_regex(start_response_regex)) {
2256+
builder.try_find_regex(end_response_regex);
22422257
}
22432258

22442259
if (!builder.syntax().parse_tool_calls) {
@@ -2252,13 +2267,10 @@ static void common_chat_parse_granite(common_chat_msg_parser & builder) {
22522267
builder.move_to(res->groups[0].end);
22532268

22542269
// Expect JSON array of tool calls
2255-
auto tool_calls_data = builder.consume_json();
2256-
if (tool_calls_data.json.is_array()) {
2257-
if (!builder.add_tool_calls(tool_calls_data.json)) {
2258-
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
2270+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
2271+
if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
2272+
throw common_chat_msg_partial_exception("incomplete tool call");
22592273
}
2260-
} else {
2261-
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
22622274
}
22632275
} else {
22642276
builder.add_content(builder.consume_rest());

ggml/CMakeLists.txt

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,41 @@
11
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
22
project("ggml" C CXX ASM)
3+
4+
### GGML Version
5+
set(GGML_VERSION_MAJOR 0)
6+
set(GGML_VERSION_MINOR 9)
7+
set(GGML_VERSION_PATCH 0)
8+
set(GGML_VERSION_DEV "-dev") # "-dev" for development, "" for releases
9+
set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
10+
11+
find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
12+
if(GIT_EXE)
13+
# Get current git commit hash
14+
execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
15+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
16+
OUTPUT_VARIABLE GGML_BUILD_COMMIT
17+
OUTPUT_STRIP_TRAILING_WHITESPACE
18+
ERROR_QUIET
19+
)
20+
21+
# Check if the working directory is dirty (i.e., has uncommitted changes)
22+
execute_process(COMMAND ${GIT_EXE} diff-index --quiet HEAD -- .
23+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
24+
RESULT_VARIABLE GGML_GIT_DIRTY
25+
ERROR_QUIET
26+
)
27+
endif()
28+
29+
# Build the version string with optional -dev suffix and dirty flag
30+
set(GGML_VERSION "${GGML_VERSION_BASE}${GGML_VERSION_DEV}")
31+
if(GGML_GIT_DIRTY AND NOT GGML_GIT_DIRTY EQUAL 0)
32+
set(GGML_VERSION "${GGML_VERSION}-dirty")
33+
endif()
34+
35+
if(NOT GGML_BUILD_COMMIT)
36+
set(GGML_BUILD_COMMIT "unknown")
37+
endif()
38+
339
include(CheckIncludeFileCXX)
440

541
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -300,26 +336,6 @@ endif()
300336
# Create CMake package
301337
#
302338

303-
# Generate version info based on git commit.
304-
305-
if(NOT DEFINED GGML_BUILD_NUMBER)
306-
find_program(GIT_EXE NAMES git git.exe REQUIRED NO_CMAKE_FIND_ROOT_PATH)
307-
execute_process(COMMAND ${GIT_EXE} rev-list --count HEAD
308-
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
309-
OUTPUT_VARIABLE GGML_BUILD_NUMBER
310-
OUTPUT_STRIP_TRAILING_WHITESPACE
311-
)
312-
313-
if(GGML_BUILD_NUMBER EQUAL 1)
314-
message(WARNING "GGML build version fixed at 1 likely due to a shallow clone.")
315-
endif()
316-
317-
execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
318-
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
319-
OUTPUT_VARIABLE GGML_BUILD_COMMIT
320-
OUTPUT_STRIP_TRAILING_WHITESPACE
321-
)
322-
endif()
323339

324340

325341
# Capture variables prefixed with GGML_.
@@ -348,7 +364,7 @@ set(GGML_VARIABLES_EXPANDED ${variable_set_statements})
348364

349365
# Create the CMake package and set install location.
350366

351-
set(GGML_INSTALL_VERSION 0.0.${GGML_BUILD_NUMBER})
367+
set(GGML_INSTALL_VERSION ${GGML_VERSION})
352368
set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
353369
set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
354370
set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")

ggml/src/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ message(STATUS "GGML_SYSTEM_ARCH: ${GGML_SYSTEM_ARCH}")
114114

115115
if (NOT MSVC)
116116
if (GGML_STATIC)
117+
if (UNIX AND NOT APPLE)
118+
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a;.so")
119+
endif()
117120
add_link_options(-static)
118121
if (MINGW)
119122
add_link_options(-static-libgcc -static-libstdc++)

ggml/src/ggml-backend-impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ extern "C" {
116116
void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
117117

118118
// (optional) sort/optimize the nodes in the graph
119-
void (*optimize_graph) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
119+
void (*graph_optimize) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
120120
};
121121

122122
struct ggml_backend {

ggml/src/ggml-backend.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -463,10 +463,10 @@ void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event)
463463
backend->iface.event_wait(backend, event);
464464
}
465465

466-
static void ggml_backend_optimize_graph(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
466+
static void ggml_backend_graph_optimize(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
467467
GGML_ASSERT(backend);
468-
if (backend->iface.optimize_graph != NULL) {
469-
backend->iface.optimize_graph(backend, cgraph);
468+
if (backend->iface.graph_optimize != NULL) {
469+
backend->iface.graph_optimize(backend, cgraph);
470470
}
471471
}
472472

@@ -1307,7 +1307,7 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
13071307

13081308
// Optimize this split of the graph. This needs to happen before we make graph_copy,
13091309
// so they are in sync.
1310-
ggml_backend_optimize_graph(sched->backends[split->backend_id], &split->graph);
1310+
ggml_backend_graph_optimize(sched->backends[split->backend_id], &split->graph);
13111311

13121312
// add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split
13131313
for (int j = 0; j < split->n_inputs; j++) {

ggml/src/ggml-blas/ggml-blas.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ static struct ggml_backend_i blas_backend_i = {
270270
/* .graph_compute = */ ggml_backend_blas_graph_compute,
271271
/* .event_record = */ NULL,
272272
/* .event_wait = */ NULL,
273-
/* .optimize_graph = */ NULL,
273+
/* .graph_optimize = */ NULL,
274274
};
275275

276276
static ggml_guid_t ggml_backend_blas_guid(void) {

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2756,7 +2756,7 @@ static const ggml_backend_i ggml_backend_cann_interface = {
27562756
/* .graph_compute = */ ggml_backend_cann_graph_compute,
27572757
/* .event_record = */ ggml_backend_cann_event_record,
27582758
/* .event_wait = */ ggml_backend_cann_event_wait,
2759-
/* .optimize_graph = */ NULL,
2759+
/* .graph_optimize = */ NULL,
27602760
};
27612761

27622762
/**

ggml/src/ggml-cpu/amx/amx.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include "ggml-cpu.h"
88
#include "traits.h"
99

10-
#if defined(__gnu_linux__)
10+
#if defined(__linux__)
1111
#include <sys/syscall.h>
1212
#include <unistd.h>
1313
#endif
@@ -186,14 +186,16 @@ static size_t ggml_backend_amx_buffer_type_get_alloc_size(ggml_backend_buffer_ty
186186
#define XFEATURE_XTILEDATA 18
187187

188188
static bool ggml_amx_init() {
189-
#if defined(__gnu_linux__)
189+
#if defined(__linux__)
190190
if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) {
191191
fprintf(stderr, "AMX is not ready to be used!\n");
192192
return false;
193193
}
194194
return true;
195195
#elif defined(_WIN32)
196196
return true;
197+
#else
198+
return false;
197199
#endif
198200
}
199201

ggml/src/ggml-cpu/common.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ static inline float bf16_to_f32(ggml_bf16_t x) {
2828
return GGML_BF16_TO_FP32(x);
2929
}
3030

31+
static inline float i32_to_f32(int32_t x) {
32+
return x;
33+
}
34+
35+
static inline int32_t f32_to_i32(float x) {
36+
return x;
37+
}
38+
3139
static inline float f32_to_f32(float x) {
3240
return x;
3341
}
@@ -54,6 +62,12 @@ struct type_conversion_table<ggml_bf16_t> {
5462
static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
5563
};
5664

65+
template <>
66+
struct type_conversion_table<int32_t> {
67+
static constexpr float (*to_f32)(int32_t) = i32_to_f32;
68+
static constexpr int32_t (*from_f32)(float) = f32_to_i32;
69+
};
70+
5771
static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
5872
const int64_t ith = params->ith;
5973
const int64_t nth = params->nth;

0 commit comments

Comments
 (0)