Skip to content

Commit f7ac792

Browse files
authored
Merge branch 'ggerganov:master' into master
2 parents 161c25f + 4a75d19 commit f7ac792

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+5062
-799
lines changed

.devops/cpu.Dockerfile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,25 @@ ARG UBUNTU_VERSION=22.04
22

33
FROM ubuntu:$UBUNTU_VERSION AS build
44

5+
ARG TARGETARCH
6+
7+
ARG GGML_CPU_ARM_ARCH=armv8-a
8+
59
RUN apt-get update && \
610
apt-get install -y build-essential git cmake libcurl4-openssl-dev
711

812
WORKDIR /app
913

1014
COPY . .
1115

12-
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
16+
RUN if [ "$TARGETARCH" = "amd64" ]; then \
17+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
18+
elif [ "$TARGETARCH" = "arm64" ]; then \
19+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
20+
else \
21+
echo "Unsupported architecture"; \
22+
exit 1; \
23+
fi && \
1324
cmake --build build -j $(nproc)
1425

1526
RUN mkdir -p /app/lib && \

.github/workflows/build.yml

Lines changed: 99 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ jobs:
5656
mkdir build
5757
cd build
5858
cmake .. \
59+
-DCMAKE_BUILD_RPATH="@loader_path" \
5960
-DLLAMA_FATAL_WARNINGS=ON \
6061
-DLLAMA_CURL=ON \
6162
-DGGML_METAL_USE_BF16=ON \
@@ -120,6 +121,7 @@ jobs:
120121
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
121122
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
122123
cmake -B build \
124+
-DCMAKE_BUILD_RPATH="@loader_path" \
123125
-DLLAMA_FATAL_WARNINGS=ON \
124126
-DLLAMA_CURL=ON \
125127
-DGGML_METAL=OFF \
@@ -160,8 +162,8 @@ jobs:
160162
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
161163
name: llama-bin-macos-x64.zip
162164

163-
ubuntu-latest-cmake:
164-
runs-on: ubuntu-latest
165+
ubuntu-cpu-cmake:
166+
runs-on: ubuntu-22.04
165167

166168
steps:
167169
- name: Clone
@@ -181,7 +183,10 @@ jobs:
181183
run: |
182184
mkdir build
183185
cd build
184-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON
186+
cmake .. \
187+
-DLLAMA_FATAL_WARNINGS=ON \
188+
-DLLAMA_CURL=ON \
189+
-DGGML_RPC=ON
185190
cmake --build . --config Release -j $(nproc)
186191
187192
- name: Test
@@ -256,7 +261,10 @@ jobs:
256261
run: |
257262
mkdir build
258263
cd build
259-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
264+
cmake .. \
265+
-DLLAMA_FATAL_WARNINGS=ON \
266+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
267+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
260268
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
261269
262270
- name: Build (no OpenMP)
@@ -265,7 +273,11 @@ jobs:
265273
run: |
266274
mkdir build
267275
cd build
268-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DGGML_OPENMP=OFF
276+
cmake .. \
277+
-DLLAMA_FATAL_WARNINGS=ON \
278+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
279+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
280+
-DGGML_OPENMP=OFF
269281
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
270282
271283
- name: Test
@@ -295,7 +307,8 @@ jobs:
295307
run: |
296308
mkdir build
297309
cd build
298-
cmake -DGGML_RPC=ON ..
310+
cmake .. \
311+
-DGGML_RPC=ON
299312
cmake --build . --config Release -j $(nproc)
300313
301314
- name: Test
@@ -325,7 +338,8 @@ jobs:
325338
run: |
326339
mkdir build
327340
cd build
328-
cmake -DGGML_VULKAN=ON ..
341+
cmake .. \
342+
-DGGML_VULKAN=ON
329343
cmake --build . --config Release -j $(nproc)
330344
331345
- name: Test
@@ -352,13 +366,18 @@ jobs:
352366
- name: Build with native CMake HIP support
353367
id: cmake_build
354368
run: |
355-
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
369+
cmake -B build -S . \
370+
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
371+
-DGGML_HIP=ON
356372
cmake --build build --config Release -j $(nproc)
357373
358374
- name: Build with legacy HIP support
359375
id: cmake_build_legacy_hip
360376
run: |
361-
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
377+
cmake -B build2 -S . \
378+
-DCMAKE_C_COMPILER=hipcc \
379+
-DCMAKE_CXX_COMPILER=hipcc \
380+
-DGGML_HIP=ON
362381
cmake --build build2 --config Release -j $(nproc)
363382
364383
ubuntu-22-cmake-musa:
@@ -379,7 +398,8 @@ jobs:
379398
- name: Build with native CMake MUSA support
380399
id: cmake_build
381400
run: |
382-
cmake -B build -S . -DGGML_MUSA=ON
401+
cmake -B build -S . \
402+
-DGGML_MUSA=ON
383403
cmake --build build --config Release -j $(nproc)
384404
385405
ubuntu-22-cmake-sycl:
@@ -420,7 +440,10 @@ jobs:
420440
source /opt/intel/oneapi/setvars.sh
421441
mkdir build
422442
cd build
423-
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
443+
cmake .. \
444+
-DGGML_SYCL=ON \
445+
-DCMAKE_C_COMPILER=icx \
446+
-DCMAKE_CXX_COMPILER=icpx
424447
cmake --build . --config Release -j $(nproc)
425448
426449
ubuntu-22-cmake-sycl-fp16:
@@ -461,42 +484,13 @@ jobs:
461484
source /opt/intel/oneapi/setvars.sh
462485
mkdir build
463486
cd build
464-
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
487+
cmake .. \
488+
-DGGML_SYCL=ON \
489+
-DCMAKE_C_COMPILER=icx \
490+
-DCMAKE_CXX_COMPILER=icpx \
491+
-DGGML_SYCL_F16=ON
465492
cmake --build . --config Release -j $(nproc)
466493
467-
# TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
468-
# how to debug it.
469-
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
470-
# would be great if we fix these
471-
macOS-latest-cmake:
472-
runs-on: macos-latest
473-
474-
steps:
475-
- name: Clone
476-
id: checkout
477-
uses: actions/checkout@v4
478-
479-
- name: Dependencies
480-
id: depends
481-
continue-on-error: true
482-
run: |
483-
brew update
484-
485-
- name: Build
486-
id: cmake_build
487-
run: |
488-
sysctl -a
489-
mkdir build
490-
cd build
491-
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF ..
492-
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
493-
494-
- name: Test
495-
id: cmake_test
496-
run: |
497-
cd build
498-
ctest -L main --verbose --timeout 900
499-
500494
macOS-latest-cmake-ios:
501495
runs-on: macos-latest
502496

@@ -827,7 +821,13 @@ jobs:
827821
828822
- name: Build with CMake
829823
run: |
830-
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89-real -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined -DLLAMA_FATAL_WARNINGS=ON
824+
cmake -S . -B build -G Ninja \
825+
-DCMAKE_BUILD_TYPE=Release \
826+
-DCMAKE_CUDA_ARCHITECTURES=89-real \
827+
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
828+
-DLLAMA_FATAL_WARNINGS=ON \
829+
-DGGML_NATIVE=OFF \
830+
-DGGML_CUDA=ON
831831
cmake --build build
832832
833833
windows-2019-cmake-cuda:
@@ -916,7 +916,11 @@ jobs:
916916
shell: cmd
917917
run: |
918918
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
919-
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DGGML_RPC=ON
919+
cmake -S . -B build -G "Ninja Multi-Config" ^
920+
-DLLAMA_BUILD_SERVER=ON ^
921+
-DGGML_NATIVE=OFF ^
922+
-DGGML_CUDA=ON ^
923+
-DGGML_RPC=ON
920924
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
921925
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
922926
cmake --build build --config Release
@@ -1069,7 +1073,12 @@ jobs:
10691073
run: |
10701074
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
10711075
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1072-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
1076+
cmake -G "Unix Makefiles" -B build -S . `
1077+
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1078+
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1079+
-DCMAKE_BUILD_TYPE=Release `
1080+
-DGGML_HIP=ON `
1081+
-DGGML_RPC=ON
10731082
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
10741083
10751084
windows-latest-cmake-hip-release:
@@ -1107,7 +1116,13 @@ jobs:
11071116
run: |
11081117
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
11091118
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1110-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1119+
cmake -G "Unix Makefiles" -B build -S . `
1120+
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1121+
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1122+
-DCMAKE_BUILD_TYPE=Release `
1123+
-DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
1124+
-DGGML_HIP=ON `
1125+
-DGGML_RPC=ON
11111126
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
11121127
md "build\bin\rocblas\library\"
11131128
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
@@ -1201,8 +1216,7 @@ jobs:
12011216
runs-on: ubuntu-latest
12021217

12031218
needs:
1204-
- ubuntu-latest-cmake
1205-
- macOS-latest-cmake
1219+
- ubuntu-cpu-cmake
12061220
- windows-latest-cmake
12071221
- windows-2019-cmake-cuda
12081222
- windows-latest-cmake-hip-release
@@ -1461,3 +1475,37 @@ jobs:
14611475
# popd
14621476
# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
14631477
# make
1478+
1479+
openEuler-latest-cmake-cann:
1480+
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
1481+
defaults:
1482+
run:
1483+
shell: bash -el {0}
1484+
runs-on: ubuntu-24.04-arm
1485+
strategy:
1486+
matrix:
1487+
cann:
1488+
- '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
1489+
device:
1490+
- 'ascend910b3'
1491+
build:
1492+
- 'Release'
1493+
container: ascendai/cann:${{ matrix.cann }}
1494+
steps:
1495+
- name: Checkout
1496+
uses: actions/checkout@v4
1497+
1498+
- name: Dependencies
1499+
run: |
1500+
yum update -y
1501+
yum install -y git gcc gcc-c++ make cmake
1502+
1503+
- name: Build
1504+
run: |
1505+
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
1506+
1507+
cmake -S . -B build \
1508+
-DCMAKE_BUILD_TYPE=${{ matrix.build }} \
1509+
-DGGML_CANN=on \
1510+
-DSOC_TYPE=${{ matrix.device }}
1511+
cmake --build build -j $(nproc)

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ endif()
1616
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
1717

1818
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
19+
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
1920

2021
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
2122
set(LLAMA_STANDALONE ON)
@@ -49,6 +50,7 @@ endif()
4950
if (MSVC)
5051
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
5152
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
53+
add_compile_options(/bigobj)
5254
endif()
5355

5456
#

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,9 @@ llama-server: \
13611361
examples/server/httplib.h \
13621362
examples/server/index.html.hpp \
13631363
examples/server/loading.html.hpp \
1364+
common/chat-template.hpp \
13641365
common/json.hpp \
1366+
common/minja.hpp \
13651367
$(OBJ_ALL)
13661368
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
13671369
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
1616

1717
## Hot topics
1818

19-
- **Introducing GGUF-my-LoRA** https://github.com/ggerganov/llama.cpp/discussions/10123
19+
- **VS Code extension for FIM completions:** https://github.com/ggml-org/llama.vscode
20+
- Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim
21+
- Introducing GGUF-my-LoRA https://github.com/ggerganov/llama.cpp/discussions/10123
2022
- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggerganov/llama.cpp/discussions/9669
2123
- Hugging Face GGUF editor: [discussion](https://github.com/ggerganov/llama.cpp/discussions/9268) | [tool](https://huggingface.co/spaces/CISCai/gguf-editor)
2224

cmake/build-info.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ if(MSVC)
4444
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
4545
else()
4646
execute_process(
47-
COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
47+
COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
4848
OUTPUT_VARIABLE OUT
4949
OUTPUT_STRIP_TRAILING_WHITESPACE
5050
)

common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ add_library(${TARGET} STATIC
5656
arg.cpp
5757
arg.h
5858
base64.hpp
59+
chat-template.hpp
5960
common.cpp
6061
common.h
6162
console.cpp
@@ -64,6 +65,7 @@ add_library(${TARGET} STATIC
6465
json.hpp
6566
log.cpp
6667
log.h
68+
minja.hpp
6769
ngram-cache.cpp
6870
ngram-cache.h
6971
sampling.cpp

0 commit comments

Comments
 (0)