Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
2c9467c
first commit of secure rpc
struct Sep 13, 2025
6dbf19e
Merge branch 'ggml-org:master' into master_secure_ggml_rpc
struct Sep 19, 2025
f20a7a4
chat: Fix streaming parser for granite models (#15682)
shun095 Sep 19, 2025
40fdcd8
llama-bench: add --devices and --list-devices support (#16039)
ssweens Sep 19, 2025
c0e18f5
server: fix SSE and OpenAI compatibility for error messages when stre…
BenjaminBruenau Sep 20, 2025
b22dc26
vulkan: use vec dot for matrix matrix multiplications (#16056)
0cc4m Sep 20, 2025
bdae4cc
CUDA : conditionally add cuda architectures (ggml/1341)
gjasny Sep 10, 2025
199ae3f
ggml : introduce semantic versioning (ggml/1336)
danbev Sep 16, 2025
a019606
sync : ggml
ggerganov Sep 20, 2025
9e6228f
vulkan: fix validation error about VK_PIPELINE_CREATE_CAPTURE_STATIST…
jeffbolznv Sep 21, 2025
1872d71
vulkan: optimize UMA buffer operations and fix driver hangs (#16059)
giuseppe Sep 21, 2025
d3c3138
ci : migrate ggml ci to self-hosted runners (#16116)
ggerganov Sep 21, 2025
bb0c2d5
ci : add label for the RISC-V runner (#16150)
ggerganov Sep 21, 2025
86b1356
opencl: initial `q8_0` mv support (#15732)
lhez Sep 21, 2025
d10b97b
opencl: fix concat crash on win arm64 with Adreno (#15944)
lhez Sep 21, 2025
838e509
vulkan: vec dot matrix multiplication fix (#16151)
0cc4m Sep 22, 2025
618613e
ci : adjust params for less runtime (#16167)
ggerganov Sep 22, 2025
2c4874b
vulkan: add RTE variants of exp shader (#16165)
jeffbolznv Sep 22, 2025
cb11841
ci : use smaller model (#16168)
ggerganov Sep 22, 2025
4d55235
ci : remove vulkaninfo calls (#16169)
ggerganov Sep 22, 2025
9519b44
contrib : update roles (#16113)
ggerganov Sep 22, 2025
1a4caf9
codeowners : claim responsibility for ci, models, gguf-py and convert…
CISC Sep 22, 2025
def127e
Vulkan: add conv_transpose_2d operation (#16022)
relent95 Sep 22, 2025
329780f
codeowners : update ownership for @ngxson and @allozuar (#16128)
ngxson Sep 22, 2025
1b23dd5
ggml : add ggml_op_is_empty (#16122)
ggerganov Sep 22, 2025
d0a69b1
ggml : extend ggml_can_fuse to work with non-sequential nodes (#16123)
ggerganov Sep 22, 2025
3823c14
common : remove unused local variables (#16140)
haiyuewa Sep 22, 2025
b4a3a10
embedding : fix typos in README (#16171)
GideonSerf Sep 22, 2025
4653d80
webui : fix handling incomplete chunks (#16107)
Bramas Sep 22, 2025
76985ed
common : enable `--offline` mode without curl support (#16137)
angt Sep 22, 2025
f092de7
codeowners : update + cleanup (#16174)
ggerganov Sep 22, 2025
714a0c1
ggml : implement set_rows with i32 index (#16159)
CISC Sep 22, 2025
223baeb
clang-tidy : disable warning about performance enum size (#16127)
haiyuewa Sep 22, 2025
f7ee0a5
feat: Add conversion support in GraniteHybrid for non-hybrid (all att…
gabe-l-hart Sep 22, 2025
f707bdc
ggml-cpu : fix typo in gemm comments [no ci] (#16189)
danbev Sep 23, 2025
2e2c1a7
devops: add s390x containers (#15915)
taronaeo Sep 23, 2025
1a3b912
codeowners : add @danbev to model-conversion example [no ci] (#16190)
danbev Sep 23, 2025
a787bdb
zdnn: refactor codebase + add docs (#16178)
taronaeo Sep 23, 2025
2cd9691
ggml : fix uninitialized is_on_grid in quantize_row_iq3_xxs_impl (#15…
CISC Sep 23, 2025
a3cedc6
ggml-cpu: Respect cpumask settings (#16164)
wishstudio Sep 23, 2025
de11e0c
ci : enable Vulkan workflow on Mac (#16194)
ggerganov Sep 23, 2025
8f54904
ci : disable AMD workflows + update NVIDIA workflows (#16200)
ggerganov Sep 23, 2025
e7e34fc
model-conversion : fix the make targets in the README.md (#16209)
DamonFool Sep 24, 2025
367caa3
codeowners : use slash prefix for root files [no ci] (#16210)
danbev Sep 24, 2025
ba522d1
model-conversion : run-org-model.py fails to run on mac m1 (#16213)
DamonFool Sep 24, 2025
c9386e6
codeowners : match all requirements files (#16214)
CISC Sep 24, 2025
c99060b
common : add missing chrono header for common.cpp (#16211)
uilianries Sep 24, 2025
ca6c6b3
model-conversion : make causal-verify-logits fails with model names c…
DamonFool Sep 24, 2025
ad8c587
model : add label for LiquidAI LFM2-2.6B model (#16204)
tdakhran Sep 24, 2025
c5ae529
ggml : split graph allocations according to backend max buffer size (…
Acly Sep 24, 2025
e7a8fb7
llama: print memory breakdown on exit (#15860)
JohannesGaessler Sep 24, 2025
cc34c05
codeowners: add ownership of zdnn backend [no ci] (#16229)
taronaeo Sep 24, 2025
97ca887
devops: fix s390x docker release failure (#16231)
taronaeo Sep 25, 2025
4daa3b0
ci: run the x64 and arm ci on the github machines instead (#16183)
netrunnereve Sep 25, 2025
0694345
codeowners: add ownership of zdnn backend [no ci] (#16232)
taronaeo Sep 25, 2025
390efeb
no conflicts
struct Sep 26, 2025
8b57b66
metal : restore im2col perf (#16219)
ggerganov Sep 25, 2025
45898a9
metal : relax reorder conditions (#16216)
ggerganov Sep 25, 2025
6492522
metal : fuse NORM + MUL + ADD, support non-multiples of 4 (#16220)
ggerganov Sep 25, 2025
e5819d6
llama : add support for qwen3 reranker (#15824)
iamlemec Sep 25, 2025
3e53442
docs: fix typo [no ci] (#16244)
JohannesGaessler Sep 25, 2025
a8f4288
ggml : fix loongarch lsx compilation error (#15864)
junchao-loongson Sep 25, 2025
d967664
server : add support for external server for tests (#16243)
danbev Sep 25, 2025
9fdf159
model-conversion : add embedding prompt file support (#15871)
danbev Sep 25, 2025
be08d72
CUDA: add a fused top-K MoE kernel (#16130)
am17an Sep 25, 2025
a1627b1
readme : update bindings (#16144)
romantal Sep 25, 2025
a326244
vendors: update miniaudio version (#16212)
taronaeo Sep 25, 2025
e09bd6c
model : add GroveMoE support (#15510)
CISC Sep 25, 2025
5c89ea5
musa: fix build warnings (#15611)
yeahdongcn Sep 26, 2025
780009f
musa: upgrade musa sdk to 4.3.0 (#16240)
yeahdongcn Sep 26, 2025
f0887ca
codeowners : add danbev as owner of build-xcframework.sh [no ci] (#16…
danbev Sep 26, 2025
a4823d7
ci : create git tags for released docker images (#16008)
rgerganov Sep 26, 2025
0a8d61f
ggml-cpu: implement MXFP4 SIMD for s390x (#16193)
taronaeo Sep 26, 2025
78b5579
build : fix build-ios-device (#16257)
angt Sep 26, 2025
17d4a49
common : use cpp-httplib as a cURL alternative for downloads (#16185)
angt Sep 26, 2025
b159348
metal : report OOM errors (#16274)
ggerganov Sep 26, 2025
df220f7
mtmd : fix uninitialized variable in bicubic_resize (#16275)
AlekseiNikiforovIBM Sep 26, 2025
4aa9134
codeowners : add rgerganov as owner of RPC [no ci] (#16279)
rgerganov Sep 26, 2025
8484427
Always show message actions for mobile UI + improvements for user mes…
allozaur Sep 26, 2025
d67e0f3
webui: switch to hash-based routing (alternative of #16079) (#16157)
isaac-mcfadyen Sep 26, 2025
335b884
Allow viewing conversations even when llama server is down (#16255)
allozaur Sep 26, 2025
d71dc6c
Enhance text file detection logic for file attachments (#16199)
allozaur Sep 26, 2025
1cc4705
devops: add s390x & ppc64le CI (#15925)
taronaeo Sep 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Checks: >
clang-analyzer-*,
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
performance-*,
-performance-enum-size,
portability-*,
-portability-simd-intrinsics,
misc-*,
Expand Down
6 changes: 3 additions & 3 deletions .devops/musa.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG MUSA_VERSION=rc4.2.0
ARG MUSA_VERSION=rc4.3.0
# Target the MUSA build image
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
ARG BASE_MUSA_DEV_CONTAINER=sh-harbor.mthreads.com/haive/mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64

ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
ARG BASE_MUSA_RUN_CONTAINER=sh-harbor.mthreads.com/haive/mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64

FROM ${BASE_MUSA_DEV_CONTAINER} AS build

Expand Down
123 changes: 123 additions & 0 deletions .devops/s390x.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
ARG GCC_VERSION=15.2.0
ARG UBUNTU_VERSION=24.04

### Build Llama.cpp stage
FROM gcc:${GCC_VERSION} AS build

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
apt update -y && \
apt upgrade -y && \
apt install -y --no-install-recommends \
git cmake ccache ninja-build \
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
libopenblas-dev libcurl4-openssl-dev && \
rm -rf /var/lib/apt/lists/*

WORKDIR /app
COPY . .

RUN --mount=type=cache,target=/root/.ccache \
--mount=type=cache,target=/app/build \
cmake -S . -B build -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DLLAMA_BUILD_TESTS=OFF \
-DGGML_BACKEND_DL=OFF \
-DGGML_NATIVE=OFF \
-DGGML_BLAS=ON \
-DGGML_BLAS_VENDOR=OpenBLAS && \
cmake --build build --config Release -j $(nproc) && \
cmake --install build --prefix /opt/llama.cpp

COPY *.py /opt/llama.cpp/bin
COPY .devops/tools.sh /opt/llama.cpp/bin

COPY gguf-py /opt/llama.cpp/gguf-py
COPY requirements.txt /opt/llama.cpp/gguf-py
COPY requirements /opt/llama.cpp/gguf-py/requirements


### Collect all llama.cpp binaries, libraries and distro libraries
FROM scratch AS collector

# Copy llama.cpp binaries and libraries
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py


### Base image
FROM ubuntu:${UBUNTU_VERSION} AS base

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
apt update -y && \
apt install -y --no-install-recommends \
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
# See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
curl libgomp1 libopenblas-dev && \
apt autoremove -y && \
apt clean -y && \
rm -rf /tmp/* /var/tmp/* && \
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
find /var/cache -type f -delete

# Copy llama.cpp libraries
COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu


### Full
FROM base AS full

ENV PATH="/root/.cargo/bin:${PATH}"
WORKDIR /app

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
apt update -y && \
apt install -y \
git cmake libjpeg-dev \
python3 python3-pip python3-dev && \
apt autoremove -y && \
apt clean -y && \
rm -rf /tmp/* /var/tmp/* && \
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
find /var/cache -type f -delete

RUN curl https://sh.rustup.rs -sSf | bash -s -- -y

COPY --from=collector /llama.cpp/bin /app
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py

RUN pip install --no-cache-dir --break-system-packages \
-r /app/gguf-py/requirements.txt

ENTRYPOINT [ "/app/tools.sh" ]


### CLI Only
FROM base AS light

WORKDIR /llama.cpp/bin

# Copy llama.cpp binaries and libraries
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin

ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]


### Server
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

WORKDIR /llama.cpp/bin

# Copy llama.cpp binaries and libraries
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin

EXPOSE 8080

ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]
91 changes: 0 additions & 91 deletions .github/workflows/build-linux-cross.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,97 +141,6 @@ jobs:

# cmake --build build --config Release -j $(nproc)

ubuntu-24-ppc64el-cpu-cross:
runs-on: ubuntu-24.04

steps:
- uses: actions/checkout@v4
- name: Setup PowerPC64le
run: |
sudo dpkg --add-architecture ppc64el

# Add arch-specific repositories for non-amd64 architectures
cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
EOF

sudo apt-get update || true ;# Prevent failure due to missing URLs.

sudo apt-get install -y --no-install-recommends \
build-essential \
gcc-14-powerpc64le-linux-gnu \
g++-14-powerpc64le-linux-gnu

- name: Build
run: |
cmake -B build -DLLAMA_CURL=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=ppc64 \
-DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
-DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH

cmake --build build --config Release -j $(nproc)

# ubuntu-24-ppc64el-vulkan-cross:
# runs-on: ubuntu-24.04

# steps:
# - uses: actions/checkout@v4
# - name: Setup PowerPC64le
# run: |
# sudo dpkg --add-architecture ppc64el

# # Add arch-specific repositories for non-amd64 architectures
# cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
# EOF

# sudo apt-get update || true ;# Prevent failure due to missing URLs.

# sudo apt-get install -y --no-install-recommends \
# build-essential \
# glslc \
# gcc-14-powerpc64le-linux-gnu \
# g++-14-powerpc64le-linux-gnu \
# libvulkan-dev:ppc64el

# - name: Build
# run: |
# cmake -B build -DLLAMA_CURL=OFF \
# -DCMAKE_BUILD_TYPE=Release \
# -DGGML_VULKAN=ON \
# -DGGML_OPENMP=OFF \
# -DLLAMA_BUILD_EXAMPLES=ON \
# -DLLAMA_BUILD_TOOLS=ON \
# -DLLAMA_BUILD_TESTS=OFF \
# -DCMAKE_SYSTEM_NAME=Linux \
# -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
# -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
# -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
# -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
# -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
# -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
# -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
# -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH

# cmake --build build --config Release -j $(nproc)

debian-13-loongarch64-cpu-cross:
runs-on: ubuntu-24.04
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-riscv-native.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:

jobs:
debian-13-riscv64-native: # Bianbu 2.2
runs-on: self-hosted
runs-on: [self-hosted, RISCV64]

steps:
- name: Install prerequisites
Expand Down
Loading