Skip to content

Commit 622d4ed

Browse files
committed
Merge branch 'master' into layla-build
2 parents 0b32b68 + 945501f commit 622d4ed

File tree

244 files changed

+38270
-15646
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

244 files changed

+38270
-15646
lines changed

.github/workflows/build.yml

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,39 @@ jobs:
387387
cd build
388388
ctest -L main --verbose
389389
390+
ubuntu-24-cmake-vulkan-deb:
391+
runs-on: ubuntu-24.04
392+
393+
steps:
394+
- name: Clone
395+
id: checkout
396+
uses: actions/checkout@v4
397+
398+
- name: ccache
399+
uses: ggml-org/[email protected]
400+
with:
401+
key: ubuntu-24-cmake-vulkan-deb
402+
evict-old-files: 1d
403+
404+
- name: Dependencies
405+
id: depends
406+
run: |
407+
sudo apt-get install -y glslc libvulkan-dev libcurl4-openssl-dev
408+
409+
- name: Configure
410+
id: cmake_configure
411+
run: |
412+
cmake -B build \
413+
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
414+
-DGGML_BACKEND_DL=ON \
415+
-DGGML_CPU_ALL_VARIANTS=ON \
416+
-DGGML_VULKAN=ON
417+
418+
- name: Build
419+
id: cmake_build
420+
run: |
421+
cmake --build build -j $(nproc)
422+
390423
ubuntu-24-cmake-vulkan:
391424
runs-on: ubuntu-24.04
392425

@@ -1272,6 +1305,81 @@ jobs:
12721305
cd examples/llama.android
12731306
./gradlew build --no-daemon
12741307
1308+
android-ndk-build:
1309+
runs-on: ubuntu-latest
1310+
1311+
env:
1312+
OPENCL_VERSION: 2025.07.22
1313+
1314+
strategy:
1315+
matrix:
1316+
include:
1317+
- build: 'arm64-cpu'
1318+
defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_CURL=OFF -D GGML_OPENMP=OFF'
1319+
- build: 'arm64-snapdragon'
1320+
defines: '--preset arm64-android-snapdragon-release'
1321+
1322+
steps:
1323+
- name: Clone
1324+
id: checkout
1325+
uses: actions/checkout@v4
1326+
1327+
- name: Install OpenCL Headers and Libs
1328+
id: install_opencl
1329+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1330+
run: |
1331+
mkdir opencl
1332+
curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1333+
curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1334+
curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1335+
tar -xaf opencl/headers.tar.gz -C opencl
1336+
tar -xaf opencl/clhpp.tar.gz -C opencl
1337+
tar -xaf opencl/icd-loader.tar.gz -C opencl
1338+
sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
1339+
sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL
1340+
cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION}
1341+
cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared
1342+
cmake --build build
1343+
sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
1344+
rm -rf opencl
1345+
1346+
- name: Install Hexagon SDK
1347+
id: install_hexsdk
1348+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1349+
env:
1350+
HEXSDK_VER: 6.4.0.2
1351+
HEXTLS_VER: 19.0.04
1352+
run: |
1353+
curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz
1354+
mkdir hex-sdk
1355+
tar -xaf hex-sdk.tar.gz -C hex-sdk
1356+
ls -l hex-sdk
1357+
sudo mv hex-sdk /opt/hexagon
1358+
echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV"
1359+
echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV"
1360+
echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV"
1361+
echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV"
1362+
echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV"
1363+
echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV"
1364+
1365+
- name: Update CMake presets
1366+
id: update_presets
1367+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1368+
run: |
1369+
cp docs/backend/hexagon/CMakeUserPresets.json .
1370+
1371+
- name: Build
1372+
id: ndk_build
1373+
run: |
1374+
cmake ${{ matrix.defines }} -B build
1375+
cmake --build build
1376+
cmake --install build --prefix pkg-adb/llama.cpp
1377+
1378+
- name: Test
1379+
id: cmake_test
1380+
run: |
1381+
echo "FIXME: test on devices"
1382+
12751383
openEuler-latest-cmake-cann:
12761384
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
12771385
defaults:

.github/workflows/release.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ jobs:
134134
include:
135135
- build: 'x64'
136136
os: ubuntu-22.04
137+
- build: 's390x-z15' # z15 because our CI runners are on z15
138+
os: ubuntu-22.04-s390x
137139
# GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm
138140
# - build: 'arm64'
139141
# os: ubuntu-22.04-arm

.github/workflows/update-ops-docs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ name: Update Operations Documentation
33
on:
44
push:
55
paths:
6+
- 'docs/ops.md'
67
- 'docs/ops/**'
78
- 'scripts/create_ops_docs.py'
89
pull_request:
910
paths:
11+
- 'docs/ops.md'
1012
- 'docs/ops/**'
1113
- 'scripts/create_ops_docs.py'
1214

CODEOWNERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
/ggml/src/ggml-cuda/common.cuh @slaren
5656
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
5757
/ggml/src/ggml-cuda/ggml-cuda.cu @slaren
58-
/ggml/src/ggml-cuda/mmf.* @JohannesGaessler
58+
/ggml/src/ggml-cuda/mmf.* @JohannesGaessler @am17an
5959
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
6060
/ggml/src/ggml-cuda/mmvf.* @JohannesGaessler
6161
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
@@ -65,6 +65,7 @@
6565
/ggml/src/ggml-impl.h @ggerganov @slaren
6666
/ggml/src/ggml-metal/ @ggerganov
6767
/ggml/src/ggml-opencl/ @lhez @max-krasnyansky
68+
/ggml/src/ggml-hexagon/ @max-krasnyansky
6869
/ggml/src/ggml-opt.cpp @JohannesGaessler
6970
/ggml/src/ggml-quants.* @ggerganov
7071
/ggml/src/ggml-rpc/ @rgerganov

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
8484
- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1)
8585
- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral)
8686
- [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct)
87+
- [x] [Jamba](https://huggingface.co/ai21labs)
8788
- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon)
8889
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
8990
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
@@ -138,6 +139,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
138139
- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
139140
- [x] [LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2-686d721927015b2ad73eaa38)
140141
- [x] [Hunyuan models](https://huggingface.co/collections/tencent/hunyuan-dense-model-6890632cda26b19119c9c5e7)
142+
- [x] [BailingMoeV2 (Ring/Ling 2.0) models](https://huggingface.co/collections/inclusionAI/ling-v2-68bf1dd2fc34c306c1fa6f86)
141143

142144
#### Multimodal
143145

@@ -187,6 +189,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
187189
- Swift [srgtuszy/llama-cpp-swift](https://github.com/srgtuszy/llama-cpp-swift)
188190
- Swift [ShenghaiWang/SwiftLlama](https://github.com/ShenghaiWang/SwiftLlama)
189191
- Delphi [Embarcadero/llama-cpp-delphi](https://github.com/Embarcadero/llama-cpp-delphi)
192+
- Go (no CGo needed): [hybridgroup/yzma](https://github.com/hybridgroup/yzma)
190193

191194
</details>
192195

@@ -278,6 +281,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
278281
| [IBM zDNN](docs/backend/zDNN.md) | IBM Z & LinuxONE |
279282
| [WebGPU [In Progress]](docs/build.md#webgpu) | All |
280283
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
284+
| [Hexagon [In Progress]](docs/backend/hexagon/README.md) | Snapdragon |
281285

282286
## Obtaining and quantizing models
283287

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then
7575
exit 1
7676
fi
7777

78-
CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
78+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
7979
fi
8080

8181
if [ ! -z ${GG_BUILD_SYCL} ]; then

common/arg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,7 +1760,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
17601760
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
17611761
add_opt(common_arg(
17621762
{"-t", "--threads"}, "N",
1763-
string_format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads),
1763+
string_format("number of CPU threads to use during generation (default: %d)", params.cpuparams.n_threads),
17641764
[](common_params & params, int value) {
17651765
params.cpuparams.n_threads = value;
17661766
if (params.cpuparams.n_threads <= 0) {
@@ -3435,7 +3435,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
34353435
[](common_params & params) {
34363436
params.use_jinja = true;
34373437
}
3438-
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
3438+
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
34393439
add_opt(common_arg(
34403440
{"--reasoning-format"}, "FORMAT",
34413441
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"

common/chat-parser.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
432432
if (is_arguments_path({})) {
433433
// Entire JSON is the arguments and was parsed fully.
434434
return consume_json_result {
435-
partial->json.dump(),
435+
partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
436436
/* .is_partial = */ false,
437437
};
438438
}
@@ -444,7 +444,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
444444
std::vector<std::string> path;
445445
std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
446446
if (is_arguments_path(path)) {
447-
auto arguments = j.dump();
447+
auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
448448
if (is_partial() && !partial->healing_marker.marker.empty()) {
449449
auto idx = arguments.find(partial->healing_marker.json_dump_marker);
450450
if (idx != std::string::npos) {

common/json-partial.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <nlohmann/json.hpp>
66

77
#include <string>
8+
#include <regex>
89

910
using json = nlohmann::ordered_json;
1011

@@ -168,6 +169,47 @@ bool common_json_parse(
168169
}
169170
}
170171

172+
// Matches a potentially partial unicode escape sequence, e.g. \u, \uX, \uXX, \uXXX, \uXXXX
173+
static const std::regex partial_unicode_regex(R"(\\u(?:[0-9a-fA-F](?:[0-9a-fA-F](?:[0-9a-fA-F](?:[0-9a-fA-F])?)?)?)?$)");
174+
175+
auto is_high_surrogate = [&](const std::string & s) {
176+
// Check if a partial of a high surrogate (U+D800-U+DBFF)
177+
return s.length() >= 4 &&
178+
s[0] == '\\' && s[1] == 'u' &&
179+
std::tolower(s[2]) == 'd' &&
180+
(s[3] == '8' || s[3] == '9' || std::tolower(s[3]) == 'a' || std::tolower(s[3]) == 'b');
181+
};
182+
183+
// Initialize the unicode marker to a low surrogate to handle the edge case
184+
// where a high surrogate (U+D800-U+DBFF) is immediately followed by a
185+
// backslash (\)
186+
std::string unicode_marker_padding = "udc00";
187+
std::smatch last_unicode_seq;
188+
189+
if (std::regex_search(str, last_unicode_seq, partial_unicode_regex)) {
190+
std::smatch second_last_seq;
191+
std::string prelude = str.substr(0, last_unicode_seq.position());
192+
193+
// Pad the escape sequence with 0s until it forms a complete sequence of 6 characters
194+
unicode_marker_padding = std::string(6 - last_unicode_seq.length(), '0');
195+
196+
if (is_high_surrogate(last_unicode_seq.str())) {
197+
// If the sequence is a partial match for a high surrogate, add a low surrogate (U+DC00-U+UDFF)
198+
unicode_marker_padding += "\\udc00";
199+
} else if (std::regex_search(prelude, second_last_seq, partial_unicode_regex)) {
200+
if (is_high_surrogate(second_last_seq.str())) {
201+
// If this follows a high surrogate, pad it to be a low surrogate
202+
if (last_unicode_seq.length() == 2) {
203+
unicode_marker_padding = "dc00";
204+
} else if (last_unicode_seq.length() == 3) {
205+
unicode_marker_padding = "c00";
206+
} else {
207+
// The original unicode_marker_padding is already padded with 0s
208+
}
209+
}
210+
}
211+
}
212+
171213
const auto & magic_seed = out.healing_marker.marker = healing_marker;//"$llama.cpp.json$";
172214

173215
if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY) {
@@ -186,6 +228,9 @@ bool common_json_parse(
186228
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
187229
// Was inside an object value string after an escape
188230
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
231+
} else if (can_parse(str + unicode_marker_padding + "\"" + closing)) {
232+
// Was inside an object value string after a partial unicode escape
233+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\"" + closing;
189234
} else {
190235
// find last :
191236
auto last_pos = str.find_last_of(':');
@@ -205,6 +250,9 @@ bool common_json_parse(
205250
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
206251
// Was inside an array value string after an escape
207252
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
253+
} else if (can_parse(str + unicode_marker_padding + "\"" + closing)) {
254+
// Was inside an array value string after a partial unicode escape
255+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\"" + closing;
208256
} else if (!was_maybe_number() && can_parse(str + ", 1" + closing)) {
209257
// Had just finished a value
210258
str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\"" + closing;
@@ -230,6 +278,9 @@ bool common_json_parse(
230278
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\": 1" + closing)) {
231279
// Was inside an object key string after an escape
232280
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\": 1" + closing;
281+
} else if (can_parse(str + unicode_marker_padding + "\": 1" + closing)) {
282+
// Was inside an object key string after a partial unicode escape
283+
str += (out.healing_marker.json_dump_marker = unicode_marker_padding + magic_seed) + "\": 1" + closing;
233284
} else {
234285
auto last_pos = str.find_last_of(':');
235286
if (last_pos == std::string::npos) {

0 commit comments

Comments
 (0)