Skip to content

Commit c6a3ae5

Browse files
authored
Merge branch 'ggml-org:master' into mradermacher
2 parents df7397f + 0a8026e commit c6a3ae5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+5645
-151
lines changed

.github/workflows/build.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,46 @@ jobs:
547547
# This is using llvmpipe and runs slower than other backends
548548
ctest -L main --verbose --timeout 3600
549549
550+
ubuntu-24-wasm-webgpu:
551+
runs-on: ubuntu-24.04
552+
553+
steps:
554+
- name: Clone
555+
id: checkout
556+
uses: actions/checkout@v4
557+
558+
- name: ccache
559+
uses: ggml-org/[email protected]
560+
with:
561+
key: ubuntu-latest-wasm-webgpu
562+
evict-old-files: 1d
563+
564+
- name: Install Emscripten
565+
run: |
566+
git clone https://github.com/emscripten-core/emsdk.git
567+
cd emsdk
568+
./emsdk install latest
569+
./emsdk activate latest
570+
571+
- name: Fetch emdawnwebgpu
572+
run: |
573+
DAWN_TAG="v20251027.212519"
574+
EMDAWN_PKG="emdawnwebgpu_pkg-${DAWN_TAG}.zip"
575+
echo "Downloading ${EMDAWN_PKG}"
576+
curl -L -o emdawn.zip \
577+
"https://github.com/google/dawn/releases/download/${DAWN_TAG}/${EMDAWN_PKG}"
578+
unzip emdawn.zip
579+
580+
- name: Build WASM WebGPU
581+
run: |
582+
source emsdk/emsdk_env.sh
583+
emcmake cmake -B build-wasm \
584+
-DGGML_WEBGPU=ON \
585+
-DLLAMA_CURL=OFF \
586+
-DEMDAWNWEBGPU_DIR=emdawnwebgpu_pkg
587+
588+
cmake --build build-wasm --target test-backend-ops -j $(nproc)
589+
550590
ubuntu-22-cmake-hip:
551591
runs-on: ubuntu-22.04
552592
container: rocm/dev-ubuntu-22.04:6.1.2

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,5 @@ poetry.toml
134134
# IDE
135135
/*.code-workspace
136136
/.windsurf/
137+
# emscripten
138+
a.out.*

CMakeLists.txt

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,24 @@ endif()
3333

3434
option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
3535

36+
option(LLAMA_WASM_MEM64 "llama: use 64-bit memory in WASM builds" ON)
37+
3638
if (EMSCRIPTEN)
3739
set(BUILD_SHARED_LIBS_DEFAULT OFF)
3840

39-
option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
41+
# Use 64-bit memory to support backend_get_memory queries
42+
# TODO: analyze performance impact, see https://spidermonkey.dev/blog/2025/01/15/is-memory64-actually-worth-using
43+
if (LLAMA_WASM_MEM64)
44+
add_compile_options("-sMEMORY64=1")
45+
add_link_options("-sMEMORY64=1")
46+
endif()
47+
add_link_options("-sALLOW_MEMORY_GROWTH=1")
48+
49+
option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF)
50+
option(LLAMA_BUILD_HTML "llama: build HTML file" ON)
51+
if (LLAMA_BUILD_HTML)
52+
set(CMAKE_EXECUTABLE_SUFFIX ".html")
53+
endif()
4054
else()
4155
if (MINGW)
4256
set(BUILD_SHARED_LIBS_DEFAULT OFF)

CODEOWNERS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,16 @@
1010
/common/arg.* @ggerganov
1111
/common/base64.hpp.* @ggerganov
1212
/common/build-info.* @ggerganov
13+
/common/chat-peg-parser.* @aldehir
1314
/common/common.* @ggerganov
1415
/common/console.* @ggerganov
1516
/common/http.* @angt
1617
/common/llguidance.* @ggerganov
1718
/common/log.* @ggerganov
19+
/common/peg-parser.* @aldehir
1820
/common/sampling.* @ggerganov
1921
/common/speculative.* @ggerganov
22+
/common/unicode.* @aldehir
2023
/convert_*.py @CISC
2124
/examples/batched.swift/ @ggerganov
2225
/examples/batched/ @ggerganov

common/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ add_library(${TARGET} STATIC
5252
chat-parser.h
5353
chat-parser-xml-toolcall.h
5454
chat-parser-xml-toolcall.cpp
55+
chat-peg-parser.cpp
56+
chat-peg-parser.h
5557
chat.cpp
5658
chat.h
5759
common.cpp
@@ -69,12 +71,16 @@ add_library(${TARGET} STATIC
6971
log.h
7072
ngram-cache.cpp
7173
ngram-cache.h
74+
peg-parser.cpp
75+
peg-parser.h
7276
regex-partial.cpp
7377
regex-partial.h
7478
sampling.cpp
7579
sampling.h
7680
speculative.cpp
7781
speculative.h
82+
unicode.cpp
83+
unicode.h
7884
)
7985

8086
if (BUILD_SHARED_LIBS)

common/arg.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <thread> // for hardware_concurrency
3131
#include <vector>
3232

33+
#ifndef __EMSCRIPTEN__
3334
#ifdef __linux__
3435
#include <linux/limits.h>
3536
#elif defined(_WIN32)
@@ -41,6 +42,8 @@
4142
#else
4243
#include <sys/syslimits.h>
4344
#endif
45+
#endif
46+
4447
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
4548

4649
using json = nlohmann::ordered_json;

common/chat-parser.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include "chat-parser.h"
2+
#include "chat-peg-parser.h"
23
#include "common.h"
34
#include "log.h"
5+
#include "peg-parser.h"
46
#include "regex-partial.h"
57

68
#include <algorithm>
@@ -1483,6 +1485,11 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
14831485
}
14841486

14851487
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
1488+
if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
1489+
syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
1490+
syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
1491+
return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
1492+
}
14861493
common_chat_msg_parser builder(input, is_partial, syntax);
14871494
try {
14881495
common_chat_parse(builder);
@@ -1500,3 +1507,36 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
15001507
}
15011508
return msg;
15021509
}
1510+
1511+
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
1512+
if (parser.empty()) {
1513+
throw std::runtime_error("Failed to parse due to missing parser definition.");
1514+
}
1515+
1516+
LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
1517+
1518+
common_peg_parse_context ctx(input, is_partial);
1519+
auto result = parser.parse(ctx);
1520+
if (result.fail()) {
1521+
throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
1522+
}
1523+
1524+
common_chat_msg msg;
1525+
msg.role = "assistant";
1526+
1527+
if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
1528+
auto mapper = common_chat_peg_native_mapper(msg);
1529+
mapper.from_ast(ctx.ast, result);
1530+
} else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
1531+
auto mapper = common_chat_peg_constructed_mapper(msg);
1532+
mapper.from_ast(ctx.ast, result);
1533+
} else {
1534+
// Generic mapper
1535+
auto mapper = common_chat_peg_mapper(msg);
1536+
mapper.from_ast(ctx.ast, result);
1537+
}
1538+
if (!is_partial) {
1539+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
1540+
}
1541+
return msg;
1542+
}

common/chat-peg-parser.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
#include "chat-peg-parser.h"
2+
3+
#include <nlohmann/json.hpp>
4+
5+
using json = nlohmann::json;
6+
7+
static std::string_view trim_trailing_space(std::string_view sv) {
8+
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
9+
sv.remove_suffix(1);
10+
}
11+
return sv;
12+
}
13+
14+
void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
15+
arena.visit(result, [this](const common_peg_ast_node & node) {
16+
map(node);
17+
});
18+
}
19+
20+
void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
21+
bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
22+
bool is_content = node.tag == common_chat_peg_builder::CONTENT;
23+
24+
if (is_reasoning) {
25+
result.reasoning_content = std::string(trim_trailing_space(node.text));
26+
}
27+
28+
if (is_content) {
29+
result.content = std::string(trim_trailing_space(node.text));
30+
}
31+
}
32+
33+
void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
34+
common_chat_peg_mapper::map(node);
35+
36+
bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
37+
bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
38+
bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
39+
bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
40+
41+
if (is_tool_open) {
42+
result.tool_calls.emplace_back();
43+
current_tool = &result.tool_calls.back();
44+
}
45+
46+
if (is_tool_id && current_tool) {
47+
current_tool->id = std::string(trim_trailing_space(node.text));
48+
}
49+
50+
if (is_tool_name && current_tool) {
51+
current_tool->name = std::string(trim_trailing_space(node.text));
52+
}
53+
54+
if (is_tool_args && current_tool) {
55+
current_tool->arguments = std::string(trim_trailing_space(node.text));
56+
}
57+
}
58+
59+
void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
60+
common_chat_peg_mapper::map(node);
61+
62+
bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
63+
bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
64+
bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
65+
bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
66+
bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
67+
bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
68+
bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
69+
bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
70+
71+
if (is_tool_open) {
72+
result.tool_calls.emplace_back();
73+
current_tool = &result.tool_calls.back();
74+
arg_count = 0;
75+
}
76+
77+
if (is_tool_name) {
78+
current_tool->name = std::string(node.text);
79+
current_tool->arguments = "{";
80+
}
81+
82+
if (is_arg_open) {
83+
needs_closing_quote = false;
84+
}
85+
86+
if (is_arg_name && current_tool) {
87+
if (arg_count > 0) {
88+
current_tool->arguments += ",";
89+
}
90+
current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
91+
++arg_count;
92+
}
93+
94+
if (is_arg_string && current_tool) {
95+
// Serialize to JSON, but exclude the end quote
96+
std::string dumped = json(node.text).dump();
97+
current_tool->arguments += dumped.substr(0, dumped.size() - 1);
98+
needs_closing_quote = true;
99+
}
100+
101+
if (is_arg_close && current_tool) {
102+
if (needs_closing_quote) {
103+
current_tool->arguments += "\"";
104+
}
105+
}
106+
107+
if (is_arg_json && current_tool) {
108+
current_tool->arguments += std::string(trim_trailing_space(node.text));
109+
}
110+
111+
if (is_tool_close && current_tool) {
112+
current_tool->arguments += "}";
113+
}
114+
}

0 commit comments

Comments
 (0)