Skip to content

Commit 614fd07

Browse files
author
Olivier Chafik
committed
Merge remote-tracking branch 'origin/master' into cuda-releases
2 parents 3d63db2 + 3d804de commit 614fd07

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+3772
-1068
lines changed

.devops/cpu.Dockerfile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,25 @@ ARG UBUNTU_VERSION=22.04
22

33
FROM ubuntu:$UBUNTU_VERSION AS build
44

5+
ARG TARGETARCH
6+
7+
ARG GGML_CPU_ARM_ARCH=armv8-a
8+
59
RUN apt-get update && \
610
apt-get install -y build-essential git cmake libcurl4-openssl-dev
711

812
WORKDIR /app
913

1014
COPY . .
1115

12-
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
16+
RUN if [ "$TARGETARCH" = "amd64" ]; then \
17+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
18+
elif [ "$TARGETARCH" = "arm64" ]; then \
19+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
20+
else \
21+
echo "Unsupported architecture"; \
22+
exit 1; \
23+
fi && \
1324
cmake --build build -j $(nproc)
1425

1526
RUN mkdir -p /app/lib && \

.devops/tools.sh

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,13 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
exec ./llama-quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
1515
exec ./llama-cli "$@"
16+
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
17+
exec ./llama-bench "$@"
18+
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
19+
exec ./llama-perplexity "$@"
1620
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
1721
echo "Converting PTH to GGML..."
18-
for i in `ls $1/$2/ggml-model-f16.bin*`; do
22+
for i in $(ls $1/$2/ggml-model-f16.bin*); do
1923
if [ -f "${i/f16/q4_0}" ]; then
2024
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
2125
else
@@ -30,6 +34,10 @@ else
3034
echo "Available commands: "
3135
echo " --run (-r): Run a model previously converted into ggml"
3236
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
37+
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
38+
echo " ex: -m model.gguf"
39+
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
40+
echo " ex: -m model.gguf -f file.txt"
3341
echo " --convert (-c): Convert a llama model into ggml"
3442
echo " ex: --outtype f16 \"/models/7B/\" "
3543
echo " --quantize (-q): Optimize with quantization process ggml"

.devops/vulkan.Dockerfile

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG UBUNTU_VERSION=jammy
1+
ARG UBUNTU_VERSION=24.04
22

33
FROM ubuntu:$UBUNTU_VERSION AS build
44

@@ -7,7 +7,7 @@ RUN apt update && apt install -y git build-essential cmake wget
77

88
# Install Vulkan SDK and cURL
99
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10-
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
10+
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
1111
apt update -y && \
1212
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
1313

@@ -34,7 +34,7 @@ RUN mkdir -p /app/full \
3434
FROM ubuntu:$UBUNTU_VERSION AS base
3535

3636
RUN apt-get update \
37-
&& apt-get install -y libgomp1 curl\
37+
&& apt-get install -y libgomp1 curl libvulkan-dev \
3838
&& apt autoremove -y \
3939
&& apt clean -y \
4040
&& rm -rf /tmp/* /var/tmp/* \
@@ -55,8 +55,9 @@ RUN apt-get update \
5555
git \
5656
python3 \
5757
python3-pip \
58-
&& pip install --upgrade pip setuptools wheel \
59-
&& pip install -r requirements.txt \
58+
python3-wheel \
59+
&& pip install --break-system-packages --upgrade setuptools \
60+
&& pip install --break-system-packages -r requirements.txt \
6061
&& apt autoremove -y \
6162
&& apt clean -y \
6263
&& rm -rf /tmp/* /var/tmp/* \

.github/workflows/build.yml

Lines changed: 100 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ jobs:
5656
mkdir build
5757
cd build
5858
cmake .. \
59+
-DCMAKE_BUILD_RPATH="@loader_path" \
5960
-DLLAMA_FATAL_WARNINGS=ON \
6061
-DLLAMA_CURL=ON \
6162
-DGGML_METAL_USE_BF16=ON \
@@ -120,6 +121,7 @@ jobs:
120121
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
121122
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
122123
cmake -B build \
124+
-DCMAKE_BUILD_RPATH="@loader_path" \
123125
-DLLAMA_FATAL_WARNINGS=ON \
124126
-DLLAMA_CURL=ON \
125127
-DGGML_METAL=OFF \
@@ -160,8 +162,8 @@ jobs:
160162
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
161163
name: llama-bin-macos-x64.zip
162164

163-
ubuntu-latest-cmake:
164-
runs-on: ubuntu-latest
165+
ubuntu-cpu-cmake:
166+
runs-on: ubuntu-22.04
165167

166168
steps:
167169
- name: Clone
@@ -181,7 +183,10 @@ jobs:
181183
run: |
182184
mkdir build
183185
cd build
184-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON
186+
cmake .. \
187+
-DLLAMA_FATAL_WARNINGS=ON \
188+
-DLLAMA_CURL=ON \
189+
-DGGML_RPC=ON
185190
cmake --build . --config Release -j $(nproc)
186191
187192
- name: Test
@@ -256,7 +261,10 @@ jobs:
256261
run: |
257262
mkdir build
258263
cd build
259-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
264+
cmake .. \
265+
-DLLAMA_FATAL_WARNINGS=ON \
266+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
267+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
260268
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
261269
262270
- name: Build (no OpenMP)
@@ -265,7 +273,11 @@ jobs:
265273
run: |
266274
mkdir build
267275
cd build
268-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DGGML_OPENMP=OFF
276+
cmake .. \
277+
-DLLAMA_FATAL_WARNINGS=ON \
278+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
279+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
280+
-DGGML_OPENMP=OFF
269281
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
270282
271283
- name: Test
@@ -295,7 +307,8 @@ jobs:
295307
run: |
296308
mkdir build
297309
cd build
298-
cmake -DGGML_RPC=ON ..
310+
cmake .. \
311+
-DGGML_RPC=ON
299312
cmake --build . --config Release -j $(nproc)
300313
301314
- name: Test
@@ -325,14 +338,16 @@ jobs:
325338
run: |
326339
mkdir build
327340
cd build
328-
cmake -DGGML_VULKAN=ON ..
341+
cmake .. \
342+
-DGGML_VULKAN=ON
329343
cmake --build . --config Release -j $(nproc)
330344
331345
- name: Test
332346
id: cmake_test
333347
run: |
334348
cd build
335-
ctest -L main --verbose --timeout 900
349+
# This is using llvmpipe and runs slower than other backends
350+
ctest -L main --verbose --timeout 1800
336351
337352
ubuntu-22-cmake-hip:
338353
runs-on: ubuntu-22.04
@@ -352,13 +367,18 @@ jobs:
352367
- name: Build with native CMake HIP support
353368
id: cmake_build
354369
run: |
355-
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
370+
cmake -B build -S . \
371+
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
372+
-DGGML_HIP=ON
356373
cmake --build build --config Release -j $(nproc)
357374
358375
- name: Build with legacy HIP support
359376
id: cmake_build_legacy_hip
360377
run: |
361-
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
378+
cmake -B build2 -S . \
379+
-DCMAKE_C_COMPILER=hipcc \
380+
-DCMAKE_CXX_COMPILER=hipcc \
381+
-DGGML_HIP=ON
362382
cmake --build build2 --config Release -j $(nproc)
363383
364384
ubuntu-22-cmake-musa:
@@ -379,7 +399,8 @@ jobs:
379399
- name: Build with native CMake MUSA support
380400
id: cmake_build
381401
run: |
382-
cmake -B build -S . -DGGML_MUSA=ON
402+
cmake -B build -S . \
403+
-DGGML_MUSA=ON
383404
cmake --build build --config Release -j $(nproc)
384405
385406
ubuntu-22-cmake-sycl:
@@ -420,7 +441,10 @@ jobs:
420441
source /opt/intel/oneapi/setvars.sh
421442
mkdir build
422443
cd build
423-
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
444+
cmake .. \
445+
-DGGML_SYCL=ON \
446+
-DCMAKE_C_COMPILER=icx \
447+
-DCMAKE_CXX_COMPILER=icpx
424448
cmake --build . --config Release -j $(nproc)
425449
426450
ubuntu-22-cmake-sycl-fp16:
@@ -461,42 +485,13 @@ jobs:
461485
source /opt/intel/oneapi/setvars.sh
462486
mkdir build
463487
cd build
464-
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
488+
cmake .. \
489+
-DGGML_SYCL=ON \
490+
-DCMAKE_C_COMPILER=icx \
491+
-DCMAKE_CXX_COMPILER=icpx \
492+
-DGGML_SYCL_F16=ON
465493
cmake --build . --config Release -j $(nproc)
466494
467-
# TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
468-
# how to debug it.
469-
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
470-
# would be great if we fix these
471-
macOS-latest-cmake:
472-
runs-on: macos-latest
473-
474-
steps:
475-
- name: Clone
476-
id: checkout
477-
uses: actions/checkout@v4
478-
479-
- name: Dependencies
480-
id: depends
481-
continue-on-error: true
482-
run: |
483-
brew update
484-
485-
- name: Build
486-
id: cmake_build
487-
run: |
488-
sysctl -a
489-
mkdir build
490-
cd build
491-
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF ..
492-
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
493-
494-
- name: Test
495-
id: cmake_test
496-
run: |
497-
cd build
498-
ctest -L main --verbose --timeout 900
499-
500495
macOS-latest-cmake-ios:
501496
runs-on: macos-latest
502497

@@ -619,6 +614,7 @@ jobs:
619614
msystem: ${{matrix.sys}}
620615
install: >-
621616
base-devel
617+
git
622618
mingw-w64-${{matrix.env}}-toolchain
623619
mingw-w64-${{matrix.env}}-cmake
624620
mingw-w64-${{matrix.env}}-openblas
@@ -809,7 +805,7 @@ jobs:
809805
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
810806
name: llama-bin-win-${{ matrix.build }}.zip
811807

812-
ubuntu-latest-cmake-cuda:
808+
ubuntu-cpu-cmake-cuda:
813809
runs-on: ubuntu-latest
814810

815811
strategy:
@@ -877,12 +873,11 @@ jobs:
877873
run: |
878874
cmake -S . -B build -G Ninja \
879875
-DCMAKE_BUILD_TYPE=Release \
880-
-DGGML_NATIVE=OFF \
881-
-DGGML_CUDA=ON \
882876
-DCMAKE_CUDA_ARCHITECTURES=${{ matrix.cuda.arch }} \
883877
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
884-
-DLLAMA_CURL=ON \
885-
-DLLAMA_FATAL_WARNINGS=ON
878+
-DLLAMA_FATAL_WARNINGS=ON \
879+
-DGGML_NATIVE=OFF \
880+
-DGGML_CUDA=ON
886881
cmake --build build
887882
888883
- name: Determine tag name
@@ -1000,7 +995,11 @@ jobs:
1000995
shell: cmd
1001996
run: |
1002997
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
1003-
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DGGML_RPC=ON
998+
cmake -S . -B build -G "Ninja Multi-Config" ^
999+
-DLLAMA_BUILD_SERVER=ON ^
1000+
-DGGML_NATIVE=OFF ^
1001+
-DGGML_CUDA=ON ^
1002+
-DGGML_RPC=ON
10041003
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
10051004
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
10061005
cmake --build build --config Release
@@ -1153,7 +1152,12 @@ jobs:
11531152
run: |
11541153
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
11551154
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1156-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
1155+
cmake -G "Unix Makefiles" -B build -S . `
1156+
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1157+
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1158+
-DCMAKE_BUILD_TYPE=Release `
1159+
-DGGML_HIP=ON `
1160+
-DGGML_RPC=ON
11571161
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
11581162
11591163
windows-latest-cmake-hip-release:
@@ -1191,7 +1195,13 @@ jobs:
11911195
run: |
11921196
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
11931197
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1194-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1198+
cmake -G "Unix Makefiles" -B build -S . `
1199+
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1200+
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1201+
-DCMAKE_BUILD_TYPE=Release `
1202+
-DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
1203+
-DGGML_HIP=ON `
1204+
-DGGML_RPC=ON
11951205
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
11961206
md "build\bin\rocblas\library\"
11971207
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
@@ -1285,9 +1295,8 @@ jobs:
12851295
runs-on: ubuntu-latest
12861296

12871297
needs:
1288-
- ubuntu-latest-cmake
1289-
- ubuntu-latest-cmake-cuda
1290-
- macOS-latest-cmake
1298+
- ubuntu-cpu-cmake
1299+
- ubuntu-cpu-cmake-cuda
12911300
- windows-latest-cmake
12921301
- windows-2019-cmake-cuda
12931302
- windows-latest-cmake-hip-release
@@ -1546,3 +1555,37 @@ jobs:
15461555
# popd
15471556
# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
15481557
# make
1558+
1559+
openEuler-latest-cmake-cann:
1560+
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
1561+
defaults:
1562+
run:
1563+
shell: bash -el {0}
1564+
runs-on: ubuntu-24.04-arm
1565+
strategy:
1566+
matrix:
1567+
cann:
1568+
- '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
1569+
device:
1570+
- 'ascend910b3'
1571+
build:
1572+
- 'Release'
1573+
container: ascendai/cann:${{ matrix.cann }}
1574+
steps:
1575+
- name: Checkout
1576+
uses: actions/checkout@v4
1577+
1578+
- name: Dependencies
1579+
run: |
1580+
yum update -y
1581+
yum install -y git gcc gcc-c++ make cmake
1582+
1583+
- name: Build
1584+
run: |
1585+
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
1586+
1587+
cmake -S . -B build \
1588+
-DCMAKE_BUILD_TYPE=${{ matrix.build }} \
1589+
-DGGML_CANN=on \
1590+
-DSOC_TYPE=${{ matrix.device }}
1591+
cmake --build build -j $(nproc)

0 commit comments

Comments
 (0)