Skip to content

Commit 5ff37fa

Browse files
committed
Merge remote-tracking branch 'origin/master' into Mamba2SSD
* origin/master: (169 commits) opencl: support imrope (ggml-org#16914) fix: Viewing multiple PDF attachments (ggml-org#16974) model-conversion : pass config to from_pretrained (ggml-org#16963) server : add props.model_alias (ggml-org#16943) ggml: CUDA: add head size 72 for flash-attn (ggml-org#16962) mtmd: add --image-min/max-tokens (ggml-org#16921) mtmd: pad mask for qwen2.5vl (ggml-org#16954) ggml : LoongArch fixes (ggml-org#16958) sync: minja (glm 4.6 & minmax m2 templates) (ggml-org#16949) SYCL: optimized repeat_back kernel (3× fewer asm instructions, 2× faster)Feature/sycl repeat back opt (ggml-org#16869) feat(webui): improve LaTeX rendering with currency detection (ggml-org#16508) test-backend-ops : fix segfault in moe-expert-reduce test in support mode and coverage (ggml-org#16936) ci : disable failing riscv cross build (ggml-org#16952) model: add Janus Pro for image understanding (ggml-org#16906) clip : use FA (ggml-org#16837) server : support unified cache across slots (ggml-org#16736) common : move gpt-oss reasoning processing to init params (ggml-org#16937) docs: remove llama_sampler_accept reference in sampling sample usage (ggml-org#16920) CUDA: add FLOOR, CEIL, ROUND, TRUNC unary ops (ggml-org#16917) devops: fix failing s390x docker build (ggml-org#16918) ...
2 parents 36244fe + c5023da commit 5ff37fa

File tree

405 files changed

+46327
-18699
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

405 files changed

+46327
-18699
lines changed

.devops/s390x.Dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ RUN --mount=type=cache,target=/root/.ccache \
2424
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
2525
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
2626
-DLLAMA_BUILD_TESTS=OFF \
27-
-DGGML_BACKEND_DL=OFF \
2827
-DGGML_NATIVE=OFF \
28+
-DGGML_BACKEND_DL=ON \
29+
-DGGML_CPU_ALL_VARIANTS=ON \
2930
-DGGML_BLAS=ON \
3031
-DGGML_BLAS_VENDOR=OpenBLAS && \
3132
cmake --build build --config Release -j $(nproc) && \
@@ -103,6 +104,7 @@ FROM base AS light
103104
WORKDIR /llama.cpp/bin
104105

105106
# Copy llama.cpp binaries and libraries
107+
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
106108
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
107109

108110
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
@@ -116,6 +118,7 @@ ENV LLAMA_ARG_HOST=0.0.0.0
116118
WORKDIR /llama.cpp/bin
117119

118120
# Copy llama.cpp binaries and libraries
121+
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
119122
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
120123

121124
EXPOSE 8080

.github/workflows/build-linux-cross.yml

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,49 +4,49 @@ on:
44
workflow_call:
55

66
jobs:
7-
ubuntu-24-riscv64-cpu-cross:
8-
runs-on: ubuntu-24.04
7+
# ubuntu-24-riscv64-cpu-cross:
8+
# runs-on: ubuntu-24.04
99

10-
steps:
11-
- uses: actions/checkout@v4
12-
- name: Setup Riscv
13-
run: |
14-
sudo dpkg --add-architecture riscv64
10+
# steps:
11+
# - uses: actions/checkout@v4
12+
# - name: Setup Riscv
13+
# run: |
14+
# sudo dpkg --add-architecture riscv64
1515

16-
# Add arch-specific repositories for non-amd64 architectures
17-
cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
18-
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
19-
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
20-
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
21-
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
22-
EOF
16+
# # Add arch-specific repositories for non-amd64 architectures
17+
# cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
18+
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
19+
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
20+
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
21+
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
22+
# EOF
2323

24-
sudo apt-get update || true ;# Prevent failure due to missing URLs.
24+
# sudo apt-get update || true ;# Prevent failure due to missing URLs.
2525

26-
sudo apt-get install -y --no-install-recommends \
27-
build-essential \
28-
gcc-14-riscv64-linux-gnu \
29-
g++-14-riscv64-linux-gnu
26+
# sudo apt-get install -y --no-install-recommends \
27+
# build-essential \
28+
# gcc-14-riscv64-linux-gnu \
29+
# g++-14-riscv64-linux-gnu
3030

31-
- name: Build
32-
run: |
33-
cmake -B build -DLLAMA_CURL=OFF \
34-
-DCMAKE_BUILD_TYPE=Release \
35-
-DGGML_OPENMP=OFF \
36-
-DLLAMA_BUILD_EXAMPLES=ON \
37-
-DLLAMA_BUILD_TOOLS=ON \
38-
-DLLAMA_BUILD_TESTS=OFF \
39-
-DCMAKE_SYSTEM_NAME=Linux \
40-
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
41-
-DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
42-
-DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
43-
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
44-
-DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
45-
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
46-
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
47-
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
31+
# - name: Build
32+
# run: |
33+
# cmake -B build -DLLAMA_CURL=OFF \
34+
# -DCMAKE_BUILD_TYPE=Release \
35+
# -DGGML_OPENMP=OFF \
36+
# -DLLAMA_BUILD_EXAMPLES=ON \
37+
# -DLLAMA_BUILD_TOOLS=ON \
38+
# -DLLAMA_BUILD_TESTS=OFF \
39+
# -DCMAKE_SYSTEM_NAME=Linux \
40+
# -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
41+
# -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
42+
# -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
43+
# -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
44+
# -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
45+
# -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
46+
# -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
47+
# -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
4848

49-
cmake --build build --config Release -j $(nproc)
49+
# cmake --build build --config Release -j $(nproc)
5050

5151
# ubuntu-24-riscv64-vulkan-cross:
5252
# runs-on: ubuntu-24.04

.github/workflows/build.yml

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,6 +1305,81 @@ jobs:
13051305
cd examples/llama.android
13061306
./gradlew build --no-daemon
13071307
1308+
android-ndk-build:
1309+
runs-on: ubuntu-latest
1310+
1311+
env:
1312+
OPENCL_VERSION: 2025.07.22
1313+
1314+
strategy:
1315+
matrix:
1316+
include:
1317+
- build: 'arm64-cpu'
1318+
defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_CURL=OFF -D GGML_OPENMP=OFF'
1319+
- build: 'arm64-snapdragon'
1320+
defines: '--preset arm64-android-snapdragon-release'
1321+
1322+
steps:
1323+
- name: Clone
1324+
id: checkout
1325+
uses: actions/checkout@v4
1326+
1327+
- name: Install OpenCL Headers and Libs
1328+
id: install_opencl
1329+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1330+
run: |
1331+
mkdir opencl
1332+
curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1333+
curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1334+
curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1335+
tar -xaf opencl/headers.tar.gz -C opencl
1336+
tar -xaf opencl/clhpp.tar.gz -C opencl
1337+
tar -xaf opencl/icd-loader.tar.gz -C opencl
1338+
sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
1339+
sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL
1340+
cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION}
1341+
cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared
1342+
cmake --build build
1343+
sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
1344+
rm -rf opencl
1345+
1346+
- name: Install Hexagon SDK
1347+
id: install_hexsdk
1348+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1349+
env:
1350+
HEXSDK_VER: 6.4.0.2
1351+
HEXTLS_VER: 19.0.04
1352+
run: |
1353+
curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz
1354+
mkdir hex-sdk
1355+
tar -xaf hex-sdk.tar.gz -C hex-sdk
1356+
ls -l hex-sdk
1357+
sudo mv hex-sdk /opt/hexagon
1358+
echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV"
1359+
echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV"
1360+
echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV"
1361+
echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV"
1362+
echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV"
1363+
echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV"
1364+
1365+
- name: Update CMake presets
1366+
id: update_presets
1367+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1368+
run: |
1369+
cp docs/backend/hexagon/CMakeUserPresets.json .
1370+
1371+
- name: Build
1372+
id: ndk_build
1373+
run: |
1374+
cmake ${{ matrix.defines }} -B build
1375+
cmake --build build
1376+
cmake --install build --prefix pkg-adb/llama.cpp
1377+
1378+
- name: Test
1379+
id: cmake_test
1380+
run: |
1381+
echo "FIXME: test on devices"
1382+
13081383
openEuler-latest-cmake-cann:
13091384
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
13101385
defaults:

.github/workflows/docker.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
# https://github.com/ggml-org/llama.cpp/issues/11888
4141
#- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
4242
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
43-
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
43+
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
4444
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
4545
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
4646
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }

.github/workflows/release.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ jobs:
134134
include:
135135
- build: 'x64'
136136
os: ubuntu-22.04
137+
- build: 's390x'
138+
os: ubuntu-24.04-s390x
137139
# GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm
138140
# - build: 'arm64'
139141
# os: ubuntu-22.04-arm

.github/workflows/update-ops-docs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ name: Update Operations Documentation
33
on:
44
push:
55
paths:
6+
- 'docs/ops.md'
67
- 'docs/ops/**'
78
- 'scripts/create_ops_docs.py'
89
pull_request:
910
paths:
11+
- 'docs/ops.md'
1012
- 'docs/ops/**'
1113
- 'scripts/create_ops_docs.py'
1214

CODEOWNERS

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
/ggml/src/ggml-cuda/common.cuh @slaren
5656
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
5757
/ggml/src/ggml-cuda/ggml-cuda.cu @slaren
58-
/ggml/src/ggml-cuda/mmf.* @JohannesGaessler
58+
/ggml/src/ggml-cuda/mmf.* @JohannesGaessler @am17an
5959
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
6060
/ggml/src/ggml-cuda/mmvf.* @JohannesGaessler
6161
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
@@ -65,6 +65,7 @@
6565
/ggml/src/ggml-impl.h @ggerganov @slaren
6666
/ggml/src/ggml-metal/ @ggerganov
6767
/ggml/src/ggml-opencl/ @lhez @max-krasnyansky
68+
/ggml/src/ggml-hexagon/ @max-krasnyansky @lhez
6869
/ggml/src/ggml-opt.cpp @JohannesGaessler
6970
/ggml/src/ggml-quants.* @ggerganov
7071
/ggml/src/ggml-rpc/ @rgerganov
@@ -88,6 +89,7 @@
8889
/src/llama-model-loader.* @slaren
8990
/src/llama-model.* @CISC
9091
/src/llama-vocab.* @CISC
92+
/src/models/ @CISC
9193
/tests/ @ggerganov
9294
/tests/test-backend-ops.cpp @slaren
9395
/tests/test-thread-safety.cpp @slaren

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
8484
- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1)
8585
- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral)
8686
- [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct)
87+
- [x] [Jamba](https://huggingface.co/ai21labs)
8788
- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon)
8889
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
8990
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
@@ -138,6 +139,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
138139
- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
139140
- [x] [LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2-686d721927015b2ad73eaa38)
140141
- [x] [Hunyuan models](https://huggingface.co/collections/tencent/hunyuan-dense-model-6890632cda26b19119c9c5e7)
142+
- [x] [BailingMoeV2 (Ring/Ling 2.0) models](https://huggingface.co/collections/inclusionAI/ling-v2-68bf1dd2fc34c306c1fa6f86)
141143

142144
#### Multimodal
143145

@@ -187,6 +189,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
187189
- Swift [srgtuszy/llama-cpp-swift](https://github.com/srgtuszy/llama-cpp-swift)
188190
- Swift [ShenghaiWang/SwiftLlama](https://github.com/ShenghaiWang/SwiftLlama)
189191
- Delphi [Embarcadero/llama-cpp-delphi](https://github.com/Embarcadero/llama-cpp-delphi)
192+
- Go (no CGo needed): [hybridgroup/yzma](https://github.com/hybridgroup/yzma)
190193

191194
</details>
192195

@@ -278,6 +281,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
278281
| [IBM zDNN](docs/backend/zDNN.md) | IBM Z & LinuxONE |
279282
| [WebGPU [In Progress]](docs/build.md#webgpu) | All |
280283
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
284+
| [Hexagon [In Progress]](docs/backend/hexagon/README.md) | Snapdragon |
281285

282286
## Obtaining and quantizing models
283287

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then
7575
exit 1
7676
fi
7777

78-
CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
78+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
7979
fi
8080

8181
if [ ! -z ${GG_BUILD_SYCL} ]; then

common/arg.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,7 +1760,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
17601760
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
17611761
add_opt(common_arg(
17621762
{"-t", "--threads"}, "N",
1763-
string_format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads),
1763+
string_format("number of CPU threads to use during generation (default: %d)", params.cpuparams.n_threads),
17641764
[](common_params & params, int value) {
17651765
params.cpuparams.n_threads = value;
17661766
if (params.cpuparams.n_threads <= 0) {
@@ -2030,7 +2030,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20302030
params.system_prompt.pop_back();
20312031
}
20322032
}
2033-
).set_examples({LLAMA_EXAMPLE_MAIN}));
2033+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_DIFFUSION}));
20342034
add_opt(common_arg(
20352035
{"--in-file"}, "FNAME",
20362036
"an input file (repeat to specify multiple files)",
@@ -2768,6 +2768,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27682768
params.image.emplace_back(value);
27692769
}
27702770
).set_examples({LLAMA_EXAMPLE_MTMD}));
2771+
add_opt(common_arg(
2772+
{"--image-min-tokens"}, "N",
2773+
"minimum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)",
2774+
[](common_params & params, int value) {
2775+
params.image_min_tokens = value;
2776+
}
2777+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_IMAGE_MIN_TOKENS"));
2778+
add_opt(common_arg(
2779+
{"--image-max-tokens"}, "N",
2780+
"maximum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)",
2781+
[](common_params & params, int value) {
2782+
params.image_max_tokens = value;
2783+
}
2784+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_IMAGE_MAX_TOKENS"));
27712785
if (llama_supports_rpc()) {
27722786
add_opt(common_arg(
27732787
{"--rpc"}, "SERVERS",
@@ -3203,7 +3217,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
32033217
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
32043218
add_opt(common_arg(
32053219
{"--parse-special"},
3206-
string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
3220+
string_format("parse special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
32073221
[](common_params & params) {
32083222
params.parse_special = true;
32093223
}
@@ -3248,7 +3262,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
32483262
).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
32493263
add_opt(common_arg(
32503264
{"--embd-output-format"}, "FORMAT",
3251-
"empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix",
3265+
"empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix, \"raw\" = plain whitespace-delimited output (one embedding per line)",
32523266
[](common_params & params, const std::string & value) {
32533267
params.embd_out = value;
32543268
}
@@ -3435,7 +3449,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
34353449
[](common_params & params) {
34363450
params.use_jinja = true;
34373451
}
3438-
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
3452+
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
34393453
add_opt(common_arg(
34403454
{"--reasoning-format"}, "FORMAT",
34413455
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"

0 commit comments

Comments
 (0)