Skip to content

Commit e8952fb

Browse files
committed
Merge branch 'master' into llamacli-reasoning2
2 parents edb8c0f + 945501f commit e8952fb

File tree

189 files changed

+23463
-3510
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

189 files changed

+23463
-3510
lines changed

.github/workflows/build.yml

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,6 +1305,81 @@ jobs:
13051305
cd examples/llama.android
13061306
./gradlew build --no-daemon
13071307
1308+
android-ndk-build:
1309+
runs-on: ubuntu-latest
1310+
1311+
env:
1312+
OPENCL_VERSION: 2025.07.22
1313+
1314+
strategy:
1315+
matrix:
1316+
include:
1317+
- build: 'arm64-cpu'
1318+
defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_CURL=OFF -D GGML_OPENMP=OFF'
1319+
- build: 'arm64-snapdragon'
1320+
defines: '--preset arm64-android-snapdragon-release'
1321+
1322+
steps:
1323+
- name: Clone
1324+
id: checkout
1325+
uses: actions/checkout@v4
1326+
1327+
- name: Install OpenCL Headers and Libs
1328+
id: install_opencl
1329+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1330+
run: |
1331+
mkdir opencl
1332+
curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1333+
curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1334+
curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1335+
tar -xaf opencl/headers.tar.gz -C opencl
1336+
tar -xaf opencl/clhpp.tar.gz -C opencl
1337+
tar -xaf opencl/icd-loader.tar.gz -C opencl
1338+
sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
1339+
sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL
1340+
cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION}
1341+
cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared
1342+
cmake --build build
1343+
sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
1344+
rm -rf opencl
1345+
1346+
- name: Install Hexagon SDK
1347+
id: install_hexsdk
1348+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1349+
env:
1350+
HEXSDK_VER: 6.4.0.2
1351+
HEXTLS_VER: 19.0.04
1352+
run: |
1353+
curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz
1354+
mkdir hex-sdk
1355+
tar -xaf hex-sdk.tar.gz -C hex-sdk
1356+
ls -l hex-sdk
1357+
sudo mv hex-sdk /opt/hexagon
1358+
echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV"
1359+
echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV"
1360+
echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV"
1361+
echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV"
1362+
echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV"
1363+
echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV"
1364+
1365+
- name: Update CMake presets
1366+
id: update_presets
1367+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1368+
run: |
1369+
cp docs/backend/hexagon/CMakeUserPresets.json .
1370+
1371+
- name: Build
1372+
id: ndk_build
1373+
run: |
1374+
cmake ${{ matrix.defines }} -B build
1375+
cmake --build build
1376+
cmake --install build --prefix pkg-adb/llama.cpp
1377+
1378+
- name: Test
1379+
id: cmake_test
1380+
run: |
1381+
echo "FIXME: test on devices"
1382+
13081383
openEuler-latest-cmake-cann:
13091384
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
13101385
defaults:

.github/workflows/release.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ jobs:
134134
include:
135135
- build: 'x64'
136136
os: ubuntu-22.04
137+
- build: 's390x-z15' # z15 because our CI runners are on z15
138+
os: ubuntu-22.04-s390x
137139
# GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm
138140
# - build: 'arm64'
139141
# os: ubuntu-22.04-arm

.github/workflows/update-ops-docs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ name: Update Operations Documentation
33
on:
44
push:
55
paths:
6+
- 'docs/ops.md'
67
- 'docs/ops/**'
78
- 'scripts/create_ops_docs.py'
89
pull_request:
910
paths:
11+
- 'docs/ops.md'
1012
- 'docs/ops/**'
1113
- 'scripts/create_ops_docs.py'
1214

CODEOWNERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
/ggml/src/ggml-cuda/common.cuh @slaren
5656
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
5757
/ggml/src/ggml-cuda/ggml-cuda.cu @slaren
58-
/ggml/src/ggml-cuda/mmf.* @JohannesGaessler
58+
/ggml/src/ggml-cuda/mmf.* @JohannesGaessler @am17an
5959
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
6060
/ggml/src/ggml-cuda/mmvf.* @JohannesGaessler
6161
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
@@ -65,6 +65,7 @@
6565
/ggml/src/ggml-impl.h @ggerganov @slaren
6666
/ggml/src/ggml-metal/ @ggerganov
6767
/ggml/src/ggml-opencl/ @lhez @max-krasnyansky
68+
/ggml/src/ggml-hexagon/ @max-krasnyansky
6869
/ggml/src/ggml-opt.cpp @JohannesGaessler
6970
/ggml/src/ggml-quants.* @ggerganov
7071
/ggml/src/ggml-rpc/ @rgerganov

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
8484
- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1)
8585
- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral)
8686
- [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct)
87+
- [x] [Jamba](https://huggingface.co/ai21labs)
8788
- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon)
8889
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
8990
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
@@ -138,6 +139,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
138139
- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
139140
- [x] [LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2-686d721927015b2ad73eaa38)
140141
- [x] [Hunyuan models](https://huggingface.co/collections/tencent/hunyuan-dense-model-6890632cda26b19119c9c5e7)
142+
- [x] [BailingMoeV2 (Ring/Ling 2.0) models](https://huggingface.co/collections/inclusionAI/ling-v2-68bf1dd2fc34c306c1fa6f86)
141143

142144
#### Multimodal
143145

@@ -187,6 +189,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
187189
- Swift [srgtuszy/llama-cpp-swift](https://github.com/srgtuszy/llama-cpp-swift)
188190
- Swift [ShenghaiWang/SwiftLlama](https://github.com/ShenghaiWang/SwiftLlama)
189191
- Delphi [Embarcadero/llama-cpp-delphi](https://github.com/Embarcadero/llama-cpp-delphi)
192+
- Go (no CGo needed): [hybridgroup/yzma](https://github.com/hybridgroup/yzma)
190193

191194
</details>
192195

@@ -278,6 +281,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
278281
| [IBM zDNN](docs/backend/zDNN.md) | IBM Z & LinuxONE |
279282
| [WebGPU [In Progress]](docs/build.md#webgpu) | All |
280283
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
284+
| [Hexagon [In Progress]](docs/backend/hexagon/README.md) | Snapdragon |
281285

282286
## Obtaining and quantizing models
283287

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then
7575
exit 1
7676
fi
7777

78-
CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
78+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
7979
fi
8080

8181
if [ ! -z ${GG_BUILD_SYCL} ]; then

common/arg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,7 +1760,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
17601760
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
17611761
add_opt(common_arg(
17621762
{"-t", "--threads"}, "N",
1763-
string_format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads),
1763+
string_format("number of CPU threads to use during generation (default: %d)", params.cpuparams.n_threads),
17641764
[](common_params & params, int value) {
17651765
params.cpuparams.n_threads = value;
17661766
if (params.cpuparams.n_threads <= 0) {
@@ -3435,7 +3435,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
34353435
[](common_params & params) {
34363436
params.use_jinja = true;
34373437
}
3438-
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
3438+
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
34393439
add_opt(common_arg(
34403440
{"--reasoning-format"}, "FORMAT",
34413441
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"

common/json-schema-to-grammar.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ static std::string build_repetition(const std::string & item_rule, int min_items
4141
return result;
4242
}
4343

44-
static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
45-
auto has_min = min_value != std::numeric_limits<int>::min();
46-
auto has_max = max_value != std::numeric_limits<int>::max();
44+
static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
45+
auto has_min = min_value != std::numeric_limits<int64_t>::min();
46+
auto has_max = max_value != std::numeric_limits<int64_t>::max();
4747

4848
auto digit_range = [&](char from, char to) {
4949
out << "[";
@@ -159,7 +159,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
159159
if (has_min) {
160160
if (min_value < 0) {
161161
out << "\"-\" (";
162-
_build_min_max_int(std::numeric_limits<int>::min(), -min_value, out, decimals_left, /* top_level= */ false);
162+
_build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
163163
out << ") | [0] | [1-9] ";
164164
more_digits(0, decimals_left - 1);
165165
} else if (min_value == 0) {
@@ -194,7 +194,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
194194
}
195195
digit_range(c, c);
196196
out << " (";
197-
_build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits<int>::max(), out, less_decimals, /* top_level= */ false);
197+
_build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
198198
out << ")";
199199
if (c < '9') {
200200
out << " | ";
@@ -216,7 +216,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
216216
_build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
217217
} else {
218218
out << "\"-\" (";
219-
_build_min_max_int(-max_value, std::numeric_limits<int>::max(), out, decimals_left, /* top_level= */ false);
219+
_build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
220220
out << ")";
221221
}
222222
return;
@@ -925,17 +925,17 @@ class SchemaConverter {
925925
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
926926
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
927927
} else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
928-
int min_value = std::numeric_limits<int>::min();
929-
int max_value = std::numeric_limits<int>::max();
928+
int64_t min_value = std::numeric_limits<int64_t>::min();
929+
int64_t max_value = std::numeric_limits<int64_t>::max();
930930
if (schema.contains("minimum")) {
931-
min_value = schema["minimum"].get<int>();
931+
min_value = schema["minimum"].get<int64_t>();
932932
} else if (schema.contains("exclusiveMinimum")) {
933-
min_value = schema["exclusiveMinimum"].get<int>() + 1;
933+
min_value = schema["exclusiveMinimum"].get<int64_t>() + 1;
934934
}
935935
if (schema.contains("maximum")) {
936-
max_value = schema["maximum"].get<int>();
936+
max_value = schema["maximum"].get<int64_t>();
937937
} else if (schema.contains("exclusiveMaximum")) {
938-
max_value = schema["exclusiveMaximum"].get<int>() - 1;
938+
max_value = schema["exclusiveMaximum"].get<int64_t>() - 1;
939939
}
940940
std::stringstream out;
941941
out << "(";

0 commit comments

Comments
 (0)