Skip to content

Commit 230b258

Browse files
committed
Merge commit '264f1b51872c125e23fa0ac1da5e2a1170de9a08' into add-spacemit-backend
Change-Id: I4c52314a0836a59c85fb5c15afd58110f6dfe2d9
2 parents 986d8bd + 264f1b5 commit 230b258

File tree

111 files changed

+3792
-3855
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+3792
-3855
lines changed

.clang-tidy

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Checks: >
1717
clang-analyzer-*,
1818
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
1919
performance-*,
20+
-performance-enum-size,
2021
portability-*,
2122
-portability-simd-intrinsics,
2223
misc-*,

.devops/s390x.Dockerfile

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
ARG GCC_VERSION=15.2.0
2+
ARG UBUNTU_VERSION=24.04
3+
4+
### Build Llama.cpp stage
5+
FROM --platform=linux/s390x gcc:${GCC_VERSION} AS build
6+
7+
RUN --mount=type=cache,target=/var/cache/apt \
8+
--mount=type=cache,target=/var/lib/apt/lists \
9+
apt update -y && \
10+
apt upgrade -y && \
11+
apt install -y --no-install-recommends \
12+
git cmake ccache ninja-build \
13+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
14+
libopenblas-dev libcurl4-openssl-dev && \
15+
rm -rf /var/lib/apt/lists/*
16+
17+
WORKDIR /app
18+
COPY . .
19+
20+
RUN --mount=type=cache,target=/root/.ccache \
21+
--mount=type=cache,target=/app/build \
22+
cmake -S . -B build -G Ninja \
23+
-DCMAKE_BUILD_TYPE=Release \
24+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
25+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
26+
-DLLAMA_BUILD_TESTS=OFF \
27+
-DGGML_BACKEND_DL=OFF \
28+
-DGGML_NATIVE=OFF \
29+
-DGGML_BLAS=ON \
30+
-DGGML_BLAS_VENDOR=OpenBLAS && \
31+
cmake --build build --config Release -j $(nproc) && \
32+
cmake --install build --prefix /opt/llama.cpp
33+
34+
COPY *.py /opt/llama.cpp/bin
35+
COPY .devops/tools.sh /opt/llama.cpp/bin
36+
37+
COPY gguf-py /opt/llama.cpp/gguf-py
38+
COPY requirements.txt /opt/llama.cpp/gguf-py
39+
COPY requirements /opt/llama.cpp/gguf-py/requirements
40+
41+
42+
### Collect all llama.cpp binaries, libraries and distro libraries
43+
FROM --platform=linux/s390x scratch AS collector
44+
45+
# Copy llama.cpp binaries and libraries
46+
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
47+
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
48+
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
49+
50+
51+
### Base image
52+
FROM --platform=linux/s390x ubuntu:${UBUNTU_VERSION} AS base
53+
54+
RUN --mount=type=cache,target=/var/cache/apt \
55+
--mount=type=cache,target=/var/lib/apt/lists \
56+
apt update -y && \
57+
apt install -y --no-install-recommends \
58+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
59+
curl libgomp1 libopenblas-dev && \
60+
apt autoremove -y && \
61+
apt clean -y && \
62+
rm -rf /tmp/* /var/tmp/* && \
63+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
64+
find /var/cache -type f -delete
65+
66+
# Copy llama.cpp libraries
67+
COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
68+
69+
70+
### Full
71+
FROM --platform=linux/s390x base AS full
72+
73+
ENV PATH="/root/.cargo/bin:${PATH}"
74+
WORKDIR /app
75+
76+
RUN --mount=type=cache,target=/var/cache/apt \
77+
--mount=type=cache,target=/var/lib/apt/lists \
78+
apt update -y && \
79+
apt install -y \
80+
git cmake libjpeg-dev \
81+
python3 python3-pip python3-dev && \
82+
apt autoremove -y && \
83+
apt clean -y && \
84+
rm -rf /tmp/* /var/tmp/* && \
85+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
86+
find /var/cache -type f -delete
87+
88+
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
89+
90+
COPY --from=collector /llama.cpp/bin /app
91+
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
92+
93+
RUN pip install --no-cache-dir --break-system-packages \
94+
-r /app/gguf-py/requirements.txt
95+
96+
ENTRYPOINT [ "/app/tools.sh" ]
97+
98+
99+
### CLI Only
100+
FROM --platform=linux/s390x base AS light
101+
102+
WORKDIR /llama.cpp/bin
103+
104+
# Copy llama.cpp binaries and libraries
105+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
106+
107+
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
108+
109+
110+
### Server
111+
FROM --platform=linux/s390x base AS server
112+
113+
ENV LLAMA_ARG_HOST=0.0.0.0
114+
115+
WORKDIR /llama.cpp/bin
116+
117+
# Copy llama.cpp binaries and libraries
118+
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
119+
120+
EXPOSE 8080
121+
122+
ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]

.github/workflows/build-riscv-native.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on:
66

77
jobs:
88
debian-13-riscv64-native: # Bianbu 2.2
9-
runs-on: self-hosted
9+
runs-on: [self-hosted, RISCV64]
1010

1111
steps:
1212
- name: Install prerequisites

.github/workflows/build.yml

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,3 +1247,193 @@ jobs:
12471247
-DGGML_CANN=on \
12481248
-DSOC_TYPE=${{ matrix.device }}
12491249
cmake --build build -j $(nproc)
1250+
1251+
# TODO: simplify the following workflows using a matrix
1252+
# TODO: run lighter CI on PRs and the full CI only on master (if needed)
1253+
ggml-ci-x64-cpu-low-perf:
1254+
runs-on: [self-hosted, Linux, X64, CPU, low-perf]
1255+
1256+
steps:
1257+
- name: Clone
1258+
id: checkout
1259+
uses: actions/checkout@v4
1260+
1261+
- name: Test
1262+
id: ggml-ci
1263+
run: |
1264+
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1265+
1266+
ggml-ci-arm64-cpu-low-perf:
1267+
runs-on: [self-hosted, Linux, ARM64, CPU, low-perf]
1268+
1269+
steps:
1270+
- name: Clone
1271+
id: checkout
1272+
uses: actions/checkout@v4
1273+
1274+
- name: Test
1275+
id: ggml-ci
1276+
run: |
1277+
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1278+
1279+
ggml-ci-x64-cpu-high-perf:
1280+
runs-on: [self-hosted, Linux, X64, CPU, high-perf]
1281+
1282+
steps:
1283+
- name: Clone
1284+
id: checkout
1285+
uses: actions/checkout@v4
1286+
1287+
- name: Test
1288+
id: ggml-ci
1289+
run: |
1290+
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1291+
1292+
ggml-ci-arm64-cpu-high-perf:
1293+
runs-on: [self-hosted, Linux, ARM64, CPU, high-perf]
1294+
1295+
steps:
1296+
- name: Clone
1297+
id: checkout
1298+
uses: actions/checkout@v4
1299+
1300+
- name: Test
1301+
id: ggml-ci
1302+
run: |
1303+
GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1304+
1305+
ggml-ci-x64-nvidia-v100-cuda:
1306+
runs-on: [self-hosted, Linux, X64, NVIDIA, V100]
1307+
1308+
steps:
1309+
- name: Clone
1310+
id: checkout
1311+
uses: actions/checkout@v4
1312+
1313+
- name: Test
1314+
id: ggml-ci
1315+
run: |
1316+
nvidia-smi
1317+
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1318+
1319+
ggml-ci-x64-nvidia-v100-vulkan:
1320+
runs-on: [self-hosted, Linux, X64, NVIDIA, V100]
1321+
1322+
steps:
1323+
- name: Clone
1324+
id: checkout
1325+
uses: actions/checkout@v4
1326+
1327+
- name: Test
1328+
id: ggml-ci
1329+
run: |
1330+
vulkaninfo
1331+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1332+
1333+
ggml-ci-x64-nvidia-t4-cuda:
1334+
runs-on: [self-hosted, Linux, X64, NVIDIA, T4]
1335+
1336+
steps:
1337+
- name: Clone
1338+
id: checkout
1339+
uses: actions/checkout@v4
1340+
1341+
- name: Test
1342+
id: ggml-ci
1343+
run: |
1344+
nvidia-smi
1345+
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1346+
1347+
ggml-ci-x64-nvidia-t4-vulkan:
1348+
runs-on: [self-hosted, Linux, X64, NVIDIA, T4]
1349+
1350+
steps:
1351+
- name: Clone
1352+
id: checkout
1353+
uses: actions/checkout@v4
1354+
1355+
- name: Test
1356+
id: ggml-ci
1357+
run: |
1358+
vulkaninfo
1359+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1360+
1361+
ggml-ci-x64-nvidia-t4-vulkan-coopmat1:
1362+
runs-on: [self-hosted, Linux, X64, NVIDIA, T4]
1363+
1364+
steps:
1365+
- name: Clone
1366+
id: checkout
1367+
uses: actions/checkout@v4
1368+
1369+
- name: Test
1370+
id: ggml-ci
1371+
run: |
1372+
vulkaninfo
1373+
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1374+
1375+
ggml-ci-x64-cpu-amx:
1376+
runs-on: [self-hosted, Linux, X64, CPU, AMX]
1377+
1378+
steps:
1379+
- name: Clone
1380+
id: checkout
1381+
uses: actions/checkout@v4
1382+
1383+
- name: Test
1384+
id: ggml-ci
1385+
run: |
1386+
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1387+
1388+
ggml-ci-x64-amd-v710-vulkan:
1389+
runs-on: [self-hosted, Linux, X64, AMD, V710]
1390+
1391+
steps:
1392+
- name: Clone
1393+
id: checkout
1394+
uses: actions/checkout@v4
1395+
1396+
- name: Test
1397+
id: ggml-ci
1398+
run: |
1399+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1400+
1401+
ggml-ci-x64-amd-v710-rocm:
1402+
runs-on: [self-hosted, Linux, X64, AMD, V710]
1403+
1404+
steps:
1405+
- name: Clone
1406+
id: checkout
1407+
uses: actions/checkout@v4
1408+
1409+
- name: Test
1410+
id: ggml-ci
1411+
run: |
1412+
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1413+
1414+
ggml-ci-mac-metal:
1415+
runs-on: [self-hosted, macOS, ARM64]
1416+
1417+
steps:
1418+
- name: Clone
1419+
id: checkout
1420+
uses: actions/checkout@v4
1421+
1422+
- name: Test
1423+
id: ggml-ci
1424+
run: |
1425+
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
1426+
1427+
# TODO: install vulkan drivers
1428+
# ggml-ci-mac-vulkan:
1429+
# runs-on: [self-hosted, macOS, ARM64]
1430+
#
1431+
# steps:
1432+
# - name: Clone
1433+
# id: checkout
1434+
# uses: actions/checkout@v4
1435+
#
1436+
# - name: Test
1437+
# id: ggml-ci
1438+
# run: |
1439+
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

.github/workflows/docker.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ jobs:
4444
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
4545
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
4646
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
47+
- { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false }
4748
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
4849
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
4950
steps:

0 commit comments

Comments
 (0)