Skip to content

Commit 08f3055

Browse files
committed
Merge remote-tracking branch 'origin/master' into GraniteDocling
* origin/master: (39 commits) ci : disable AMD workflows + update NVIDIA workflows (ggml-org#16200) ci : enable Vulkan workflow on Mac (ggml-org#16194) ggml-cpu: Respect cpumask settings (ggml-org#16164) ggml : fix uninitialized is_on_grid in quantize_row_iq3_xxs_impl (ggml-org#15928) zdnn: refactor codebase + add docs (ggml-org#16178) codeowners : add @danbev to model-conversion example [no ci] (ggml-org#16190) devops: add s390x containers (ggml-org#15915) ggml-cpu : fix typo in gemm comments [no ci] (ggml-org#16189) feat: Add conversion support in GraniteHybrid for non-hybrid (all attn) (ggml-org#16177) clang-tidy : disable warning about performance enum size (ggml-org#16127) ggml : implement set_rows with i32 index (ggml-org#16159) codeowners : update + cleanup (ggml-org#16174) common : enable `--offline` mode without curl support (ggml-org#16137) webui : fix handling incomplete chunks (ggml-org#16107) embedding : fix typos in README (ggml-org#16171) common : remove unused local variables (ggml-org#16140) ggml : extend ggml_can_fuse to work with non-sequential nodes (ggml-org#16123) ggml : add ggml_op_is_empty (ggml-org#16122) codeowners : update ownership for @ngxson and @allozuar (ggml-org#16128) Vulkan: add conv_transpose_2d operation (ggml-org#16022) ...
2 parents cb51d4e + f505bd8 commit 08f3055

File tree

102 files changed

+3254
-2804
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+3254
-2804
lines changed

.clang-tidy

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Checks: >
1717
clang-analyzer-*,
1818
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
1919
performance-*,
20+
-performance-enum-size,
2021
portability-*,
2122
-portability-simd-intrinsics,
2223
misc-*,

.devops/s390x.Dockerfile

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
ARG GCC_VERSION=15.2.0
2+
ARG UBUNTU_VERSION=24.04
3+
4+
### Build Llama.cpp stage
5+
FROM --platform=linux/s390x gcc:${GCC_VERSION} AS build
6+
7+
RUN --mount=type=cache,target=/var/cache/apt \
8+
--mount=type=cache,target=/var/lib/apt/lists \
9+
apt update -y && \
10+
apt upgrade -y && \
11+
apt install -y --no-install-recommends \
12+
git cmake ccache ninja-build \
13+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
14+
libopenblas-dev libcurl4-openssl-dev && \
15+
rm -rf /var/lib/apt/lists/*
16+
17+
WORKDIR /app
18+
COPY . .
19+
20+
RUN --mount=type=cache,target=/root/.ccache \
21+
--mount=type=cache,target=/app/build \
22+
cmake -S . -B build -G Ninja \
23+
-DCMAKE_BUILD_TYPE=Release \
24+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
25+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
26+
-DLLAMA_BUILD_TESTS=OFF \
27+
-DGGML_BACKEND_DL=OFF \
28+
-DGGML_NATIVE=OFF \
29+
-DGGML_BLAS=ON \
30+
-DGGML_BLAS_VENDOR=OpenBLAS && \
31+
cmake --build build --config Release -j $(nproc) && \
32+
cmake --install build --prefix /opt/llama.cpp
33+
34+
COPY *.py /opt/llama.cpp/bin
35+
COPY .devops/tools.sh /opt/llama.cpp/bin
36+
37+
COPY gguf-py /opt/llama.cpp/gguf-py
38+
COPY requirements.txt /opt/llama.cpp/gguf-py
39+
COPY requirements /opt/llama.cpp/gguf-py/requirements
40+
41+
42+
### Collect all llama.cpp binaries, libraries and distro libraries
43+
FROM --platform=linux/s390x scratch AS collector
44+
45+
# Copy llama.cpp binaries and libraries
46+
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
47+
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
48+
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
49+
50+
51+
### Base image
52+
FROM --platform=linux/s390x ubuntu:${UBUNTU_VERSION} AS base
53+
54+
RUN --mount=type=cache,target=/var/cache/apt \
55+
--mount=type=cache,target=/var/lib/apt/lists \
56+
apt update -y && \
57+
apt install -y --no-install-recommends \
58+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
59+
curl libgomp1 libopenblas-dev && \
60+
apt autoremove -y && \
61+
apt clean -y && \
62+
rm -rf /tmp/* /var/tmp/* && \
63+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
64+
find /var/cache -type f -delete
65+
66+
# Copy llama.cpp libraries
67+
COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
68+
69+
70+
### Full
71+
FROM --platform=linux/s390x base AS full
72+
73+
ENV PATH="/root/.cargo/bin:${PATH}"
74+
WORKDIR /app
75+
76+
RUN --mount=type=cache,target=/var/cache/apt \
77+
--mount=type=cache,target=/var/lib/apt/lists \
78+
apt update -y && \
79+
apt install -y \
80+
git cmake libjpeg-dev \
81+
python3 python3-pip python3-dev && \
82+
apt autoremove -y && \
83+
apt clean -y && \
84+
rm -rf /tmp/* /var/tmp/* && \
85+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
86+
find /var/cache -type f -delete
87+
88+
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
89+
90+
COPY --from=collector /llama.cpp/bin /app
91+
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
92+
93+
RUN pip install --no-cache-dir --break-system-packages \
94+
-r /app/gguf-py/requirements.txt
95+
96+
ENTRYPOINT [ "/app/tools.sh" ]
97+
98+
99+
### CLI Only
100+
FROM --platform=linux/s390x base AS light
101+
102+
WORKDIR /llama.cpp/bin
103+
104+
# Copy llama.cpp binaries and libraries
105+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
106+
107+
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
108+
109+
110+
### Server
111+
FROM --platform=linux/s390x base AS server
112+
113+
ENV LLAMA_ARG_HOST=0.0.0.0
114+
115+
WORKDIR /llama.cpp/bin
116+
117+
# Copy llama.cpp binaries and libraries
118+
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
119+
120+
EXPOSE 8080
121+
122+
ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]

.github/workflows/build-riscv-native.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on:
66

77
jobs:
88
debian-13-riscv64-native: # Bianbu 2.2
9-
runs-on: self-hosted
9+
runs-on: [self-hosted, RISCV64]
1010

1111
steps:
1212
- name: Install prerequisites

.github/workflows/build.yml

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,3 +1247,167 @@ jobs:
12471247
-DGGML_CANN=on \
12481248
-DSOC_TYPE=${{ matrix.device }}
12491249
cmake --build build -j $(nproc)
1250+
1251+
# TODO: simplify the following workflows using a matrix
1252+
# TODO: run lighter CI on PRs and the full CI only on master (if needed)
1253+
ggml-ci-x64-cpu-low-perf:
1254+
runs-on: [self-hosted, Linux, X64, CPU, low-perf]
1255+
1256+
steps:
1257+
- name: Clone
1258+
id: checkout
1259+
uses: actions/checkout@v4
1260+
1261+
- name: Test
1262+
id: ggml-ci
1263+
run: |
1264+
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1265+
1266+
ggml-ci-arm64-cpu-low-perf:
1267+
runs-on: [self-hosted, Linux, ARM64, CPU, low-perf]
1268+
1269+
steps:
1270+
- name: Clone
1271+
id: checkout
1272+
uses: actions/checkout@v4
1273+
1274+
- name: Test
1275+
id: ggml-ci
1276+
run: |
1277+
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1278+
1279+
ggml-ci-x64-cpu-high-perf:
1280+
runs-on: [self-hosted, Linux, X64, CPU, high-perf]
1281+
1282+
steps:
1283+
- name: Clone
1284+
id: checkout
1285+
uses: actions/checkout@v4
1286+
1287+
- name: Test
1288+
id: ggml-ci
1289+
run: |
1290+
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1291+
1292+
ggml-ci-arm64-cpu-high-perf:
1293+
runs-on: [self-hosted, Linux, ARM64, CPU, high-perf]
1294+
1295+
steps:
1296+
- name: Clone
1297+
id: checkout
1298+
uses: actions/checkout@v4
1299+
1300+
- name: Test
1301+
id: ggml-ci
1302+
run: |
1303+
GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1304+
1305+
ggml-ci-x64-nvidia-cuda:
1306+
runs-on: [self-hosted, Linux, X64, NVIDIA]
1307+
1308+
steps:
1309+
- name: Clone
1310+
id: checkout
1311+
uses: actions/checkout@v4
1312+
1313+
- name: Test
1314+
id: ggml-ci
1315+
run: |
1316+
nvidia-smi
1317+
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1318+
1319+
ggml-ci-x64-nvidia-vulkan-cm:
1320+
runs-on: [self-hosted, Linux, X64, NVIDIA]
1321+
1322+
steps:
1323+
- name: Clone
1324+
id: checkout
1325+
uses: actions/checkout@v4
1326+
1327+
- name: Test
1328+
id: ggml-ci
1329+
run: |
1330+
vulkaninfo --summary
1331+
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1332+
1333+
ggml-ci-x64-nvidia-vulkan-cm2:
1334+
runs-on: [self-hosted, Linux, X64, NVIDIA, COOPMAT2]
1335+
1336+
steps:
1337+
- name: Clone
1338+
id: checkout
1339+
uses: actions/checkout@v4
1340+
1341+
- name: Test
1342+
id: ggml-ci
1343+
run: |
1344+
vulkaninfo --summary
1345+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1346+
1347+
ggml-ci-x64-cpu-amx:
1348+
runs-on: [self-hosted, Linux, X64, CPU, AMX]
1349+
1350+
steps:
1351+
- name: Clone
1352+
id: checkout
1353+
uses: actions/checkout@v4
1354+
1355+
- name: Test
1356+
id: ggml-ci
1357+
run: |
1358+
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1359+
1360+
# ggml-ci-x64-amd-vulkan:
1361+
# runs-on: [self-hosted, Linux, X64, AMD]
1362+
#
1363+
# steps:
1364+
# - name: Clone
1365+
# id: checkout
1366+
# uses: actions/checkout@v4
1367+
#
1368+
# - name: Test
1369+
# id: ggml-ci
1370+
# run: |
1371+
# vulkaninfo --summary
1372+
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1373+
#
1374+
# ggml-ci-x64-amd-rocm:
1375+
# runs-on: [self-hosted, Linux, X64, AMD]
1376+
#
1377+
# steps:
1378+
# - name: Clone
1379+
# id: checkout
1380+
# uses: actions/checkout@v4
1381+
#
1382+
# - name: Test
1383+
# id: ggml-ci
1384+
# run: |
1385+
# amd-smi static
1386+
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1387+
1388+
ggml-ci-mac-metal:
1389+
runs-on: [self-hosted, macOS, ARM64]
1390+
1391+
steps:
1392+
- name: Clone
1393+
id: checkout
1394+
uses: actions/checkout@v4
1395+
1396+
- name: Test
1397+
id: ggml-ci
1398+
run: |
1399+
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
1400+
1401+
ggml-ci-mac-vulkan:
1402+
runs-on: [self-hosted, macOS, ARM64]
1403+
1404+
steps:
1405+
- name: Clone
1406+
id: checkout
1407+
uses: actions/checkout@v4
1408+
1409+
- name: Test
1410+
id: ggml-ci
1411+
run: |
1412+
vulkaninfo --summary
1413+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

.github/workflows/docker.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ jobs:
4444
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
4545
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
4646
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
47+
- { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false }
4748
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
4849
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
4950
steps:

0 commit comments

Comments
 (0)