Skip to content

Commit 64fc676

Browse files
committed
Merge remote-tracking branch 'origin/master' into GraniteDocling
* origin/master: (124 commits) metal : fix loop bound in ggml_mem_ranges (ggml-org#16412) llama : fix shapes for bert/mpt q/k norm (ggml-org#16409) ggml : fix graph reallocation with multiple chunks (ggml-org#16396) Fix missing messages on sibling navigation (ggml-org#16408) vulkan: Replace uses of maxMemoryAllocationSize and VK_WHOLE_SIZE (ggml-org#16354) vulkan: Fix FA coopmat1 invalid array indexing (ggml-org#16365) ci : change macos-13 to macos-15-intel (ggml-org#16401) Capture model name only after first token (streaming) or completed request (ggml-org#16405) vulkan: in flash attention, bounds check against nem1 (don't rely on GGML_KQ_MASK_PAD) (ggml-org#16316) webui : Fix messages payload sent to chat completions (ggml-org#16402) fix: track viewportHeight via window.innerHeight to avoid unwanted scrolling (ggml-org#16356) test-barrier : do not use more threads than physically available (ggml-org#16389) ggml webgpu: add support for soft_max, optimize rms_norm (ggml-org#16357) model : Apertus model implementation (ggml-org#15852) musa: update compile flags (ggml-org#16265) ci : fix ubuntu-latest-cmake-rpc (disable ccache) (ggml-org#16388) ci: update vulkan ci (ggml-org#16294) ci : fix clean-up of old logs (ggml-org#16381) SYCL: Update to oneAPI 2025.2 (ggml-org#16371) HIP: add IMbackK to codeowner (ggml-org#16375) ...
2 parents 08f3055 + 606a73f commit 64fc676

File tree

356 files changed

+22464
-7756
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

356 files changed

+22464
-7756
lines changed

.devops/intel.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
1+
ARG ONEAPI_VERSION=2025.2.2-0-devel-ubuntu24.04
22

33
## Build Image
44

5-
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
5+
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
66

77
ARG GGML_SYCL_F16=OFF
88
RUN apt-get update && \
@@ -31,7 +31,7 @@ RUN mkdir -p /app/full \
3131
&& cp requirements.txt /app/full \
3232
&& cp .devops/tools.sh /app/full/tools.sh
3333

34-
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
34+
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
3535

3636
RUN apt-get update \
3737
&& apt-get install -y libgomp1 curl\

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG MUSA_VERSION=rc4.2.0
3+
ARG MUSA_VERSION=rc4.3.0
44
# Target the MUSA build image
55
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
66

.devops/rocm.Dockerfile

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
ARG UBUNTU_VERSION=24.04
22

33
# This needs to generally match the container host's environment.
4-
ARG ROCM_VERSION=6.4
5-
ARG AMDGPU_VERSION=6.4
4+
ARG ROCM_VERSION=7.0
5+
ARG AMDGPU_VERSION=7.0
66

77
# Target the ROCm build image
88
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
@@ -13,9 +13,8 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1313
# Unless otherwise specified, we make a fat build.
1414
# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
1515
# This is mostly tied to rocBLAS supported archs.
16-
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
17-
# gfx906 is deprecated
18-
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
16+
# gfx803, gfx900, gfx906, gfx1032, gfx1101, gfx1102,not officialy supported
17+
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
1918

2019
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
2120
#ARG ROCM_DOCKER_ARCH='gfx1151'
@@ -36,13 +35,10 @@ WORKDIR /app
3635

3736
COPY . .
3837

39-
RUN git clone https://github.com/rocm/rocwmma --branch develop --depth 1
40-
4138
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
4239
cmake -S . -B build \
4340
-DGGML_HIP=ON \
4441
-DGGML_HIP_ROCWMMA_FATTN=ON \
45-
-DCMAKE_HIP_FLAGS="-I$(pwd)/rocwmma/library/include/" \
4642
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
4743
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
4844
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \

.devops/s390x.Dockerfile

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ ARG GCC_VERSION=15.2.0
22
ARG UBUNTU_VERSION=24.04
33

44
### Build Llama.cpp stage
5-
FROM --platform=linux/s390x gcc:${GCC_VERSION} AS build
5+
FROM gcc:${GCC_VERSION} AS build
66

7-
RUN --mount=type=cache,target=/var/cache/apt \
8-
--mount=type=cache,target=/var/lib/apt/lists \
7+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
8+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
99
apt update -y && \
1010
apt upgrade -y && \
1111
apt install -y --no-install-recommends \
@@ -40,7 +40,7 @@ COPY requirements /opt/llama.cpp/gguf-py/requirements
4040

4141

4242
### Collect all llama.cpp binaries, libraries and distro libraries
43-
FROM --platform=linux/s390x scratch AS collector
43+
FROM scratch AS collector
4444

4545
# Copy llama.cpp binaries and libraries
4646
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
@@ -49,13 +49,14 @@ COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
4949

5050

5151
### Base image
52-
FROM --platform=linux/s390x ubuntu:${UBUNTU_VERSION} AS base
52+
FROM ubuntu:${UBUNTU_VERSION} AS base
5353

54-
RUN --mount=type=cache,target=/var/cache/apt \
55-
--mount=type=cache,target=/var/lib/apt/lists \
54+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
55+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
5656
apt update -y && \
5757
apt install -y --no-install-recommends \
5858
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
59+
# See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
5960
curl libgomp1 libopenblas-dev && \
6061
apt autoremove -y && \
6162
apt clean -y && \
@@ -68,13 +69,13 @@ COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
6869

6970

7071
### Full
71-
FROM --platform=linux/s390x base AS full
72+
FROM base AS full
7273

7374
ENV PATH="/root/.cargo/bin:${PATH}"
7475
WORKDIR /app
7576

76-
RUN --mount=type=cache,target=/var/cache/apt \
77-
--mount=type=cache,target=/var/lib/apt/lists \
77+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
78+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
7879
apt update -y && \
7980
apt install -y \
8081
git cmake libjpeg-dev \
@@ -97,7 +98,7 @@ ENTRYPOINT [ "/app/tools.sh" ]
9798

9899

99100
### CLI Only
100-
FROM --platform=linux/s390x base AS light
101+
FROM base AS light
101102

102103
WORKDIR /llama.cpp/bin
103104

@@ -108,7 +109,7 @@ ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
108109

109110

110111
### Server
111-
FROM --platform=linux/s390x base AS server
112+
FROM base AS server
112113

113114
ENV LLAMA_ARG_HOST=0.0.0.0
114115

.github/workflows/build-amd.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: CI (AMD)
2+
3+
on:
4+
workflow_dispatch: # allows manual triggering
5+
push:
6+
branches:
7+
- master
8+
paths: [
9+
'.github/workflows/build-amd.yml',
10+
'**/CMakeLists.txt',
11+
'**/.cmake',
12+
'**/*.h',
13+
'**/*.hpp',
14+
'**/*.c',
15+
'**/*.cpp',
16+
'**/*.cu',
17+
'**/*.cuh',
18+
'**/*.comp'
19+
]
20+
21+
concurrency:
22+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
23+
cancel-in-progress: true
24+
25+
jobs:
26+
ggml-ci-x64-amd-vulkan:
27+
runs-on: [self-hosted, Linux, X64, AMD]
28+
29+
steps:
30+
- name: Clone
31+
id: checkout
32+
uses: actions/checkout@v4
33+
34+
- name: Test
35+
id: ggml-ci
36+
run: |
37+
vulkaninfo --summary
38+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
39+
40+
ggml-ci-x64-amd-rocm:
41+
runs-on: [self-hosted, Linux, X64, AMD]
42+
43+
steps:
44+
- name: Clone
45+
id: checkout
46+
uses: actions/checkout@v4
47+
48+
- name: Test
49+
id: ggml-ci
50+
run: |
51+
amd-smi static
52+
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

.github/workflows/build-linux-cross.yml

Lines changed: 44 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -141,97 +141,6 @@ jobs:
141141

142142
# cmake --build build --config Release -j $(nproc)
143143

144-
ubuntu-24-ppc64el-cpu-cross:
145-
runs-on: ubuntu-24.04
146-
147-
steps:
148-
- uses: actions/checkout@v4
149-
- name: Setup PowerPC64le
150-
run: |
151-
sudo dpkg --add-architecture ppc64el
152-
153-
# Add arch-specific repositories for non-amd64 architectures
154-
cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
155-
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
156-
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
157-
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
158-
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
159-
EOF
160-
161-
sudo apt-get update || true ;# Prevent failure due to missing URLs.
162-
163-
sudo apt-get install -y --no-install-recommends \
164-
build-essential \
165-
gcc-14-powerpc64le-linux-gnu \
166-
g++-14-powerpc64le-linux-gnu
167-
168-
- name: Build
169-
run: |
170-
cmake -B build -DLLAMA_CURL=OFF \
171-
-DCMAKE_BUILD_TYPE=Release \
172-
-DGGML_OPENMP=OFF \
173-
-DLLAMA_BUILD_EXAMPLES=ON \
174-
-DLLAMA_BUILD_TOOLS=ON \
175-
-DLLAMA_BUILD_TESTS=OFF \
176-
-DCMAKE_SYSTEM_NAME=Linux \
177-
-DCMAKE_SYSTEM_PROCESSOR=ppc64 \
178-
-DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
179-
-DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
180-
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
181-
-DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
182-
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
183-
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
184-
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
185-
186-
cmake --build build --config Release -j $(nproc)
187-
188-
# ubuntu-24-ppc64el-vulkan-cross:
189-
# runs-on: ubuntu-24.04
190-
191-
# steps:
192-
# - uses: actions/checkout@v4
193-
# - name: Setup PowerPC64le
194-
# run: |
195-
# sudo dpkg --add-architecture ppc64el
196-
197-
# # Add arch-specific repositories for non-amd64 architectures
198-
# cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
199-
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
200-
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
201-
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
202-
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
203-
# EOF
204-
205-
# sudo apt-get update || true ;# Prevent failure due to missing URLs.
206-
207-
# sudo apt-get install -y --no-install-recommends \
208-
# build-essential \
209-
# glslc \
210-
# gcc-14-powerpc64le-linux-gnu \
211-
# g++-14-powerpc64le-linux-gnu \
212-
# libvulkan-dev:ppc64el
213-
214-
# - name: Build
215-
# run: |
216-
# cmake -B build -DLLAMA_CURL=OFF \
217-
# -DCMAKE_BUILD_TYPE=Release \
218-
# -DGGML_VULKAN=ON \
219-
# -DGGML_OPENMP=OFF \
220-
# -DLLAMA_BUILD_EXAMPLES=ON \
221-
# -DLLAMA_BUILD_TOOLS=ON \
222-
# -DLLAMA_BUILD_TESTS=OFF \
223-
# -DCMAKE_SYSTEM_NAME=Linux \
224-
# -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
225-
# -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
226-
# -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
227-
# -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
228-
# -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
229-
# -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
230-
# -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
231-
# -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
232-
233-
# cmake --build build --config Release -j $(nproc)
234-
235144
debian-13-loongarch64-cpu-cross:
236145
runs-on: ubuntu-24.04
237146
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
@@ -344,3 +253,47 @@ jobs:
344253
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
345254
346255
cmake --build build --config Release -j $(nproc)
256+
257+
ubuntu-24-riscv64-cpu-spacemit-ime-cross:
258+
runs-on: ubuntu-24.04
259+
260+
env:
261+
SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2"
262+
SPACEMIT_IME_TOOLCHAIN_PATH: "spacemit-toolchain-linux-glibc-x86_64"
263+
264+
steps:
265+
- uses: actions/checkout@v4
266+
267+
- name: Cache Toolchain
268+
uses: actions/cache@v4
269+
id: cache-spacemit-ime-cross-toolchain
270+
with:
271+
path: ./${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}
272+
key: ${{ runner.os }}-spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}
273+
274+
- name: Setup Toolchain
275+
if: steps.cache-spacemit-ime-cross-toolchain.outputs.cache-hit != 'true'
276+
run: |
277+
wget --quiet --no-check-certificate https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}.tar.xz -O ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}.tar.xz
278+
rm -rf ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}
279+
mkdir -p ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}
280+
tar xf ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}.tar.xz -C ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }} --strip-components=1
281+
rm -rf ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}.tar.xz
282+
283+
- name: Build
284+
run: |
285+
export RISCV_ROOT_PATH=${PWD}/${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}
286+
cmake -B build -DLLAMA_CURL=OFF \
287+
-DCMAKE_BUILD_TYPE=Release \
288+
-DGGML_OPENMP=OFF \
289+
-DLLAMA_BUILD_EXAMPLES=ON \
290+
-DLLAMA_BUILD_TOOLS=ON \
291+
-DLLAMA_BUILD_TESTS=OFF \
292+
-DGGML_CPU_RISCV64_SPACEMIT=ON \
293+
-DGGML_RVV=ON \
294+
-DGGML_RV_ZFH=ON \
295+
-DGGML_RV_ZICBOP=ON \
296+
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
297+
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake
298+
299+
cmake --build build --config Release -j $(nproc)

0 commit comments

Comments
 (0)