Skip to content

Commit d875c8e

Browse files
authored
Merge branch 'ggerganov:master' into master
2 parents f7ac792 + 325afb3 commit d875c8e

File tree

29 files changed

+886
-481
lines changed

29 files changed

+886
-481
lines changed

.devops/tools.sh

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,13 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
exec ./llama-quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
1515
exec ./llama-cli "$@"
16+
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
17+
exec ./llama-bench "$@"
18+
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
19+
exec ./llama-perplexity "$@"
1620
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
1721
echo "Converting PTH to GGML..."
18-
for i in `ls $1/$2/ggml-model-f16.bin*`; do
22+
for i in $(ls $1/$2/ggml-model-f16.bin*); do
1923
if [ -f "${i/f16/q4_0}" ]; then
2024
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
2125
else
@@ -30,6 +34,10 @@ else
3034
echo "Available commands: "
3135
echo " --run (-r): Run a model previously converted into ggml"
3236
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
37+
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
38+
echo " ex: -m model.gguf"
39+
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
40+
echo " ex: -m model.gguf -f file.txt"
3341
echo " --convert (-c): Convert a llama model into ggml"
3442
echo " ex: --outtype f16 \"/models/7B/\" "
3543
echo " --quantize (-q): Optimize with quantization process ggml"

.devops/vulkan.Dockerfile

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG UBUNTU_VERSION=jammy
1+
ARG UBUNTU_VERSION=24.04
22

33
FROM ubuntu:$UBUNTU_VERSION AS build
44

@@ -7,7 +7,7 @@ RUN apt update && apt install -y git build-essential cmake wget
77

88
# Install Vulkan SDK and cURL
99
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10-
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
10+
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
1111
apt update -y && \
1212
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
1313

@@ -34,7 +34,7 @@ RUN mkdir -p /app/full \
3434
FROM ubuntu:$UBUNTU_VERSION AS base
3535

3636
RUN apt-get update \
37-
&& apt-get install -y libgomp1 curl\
37+
&& apt-get install -y libgomp1 curl libvulkan-dev \
3838
&& apt autoremove -y \
3939
&& apt clean -y \
4040
&& rm -rf /tmp/* /var/tmp/* \
@@ -55,8 +55,9 @@ RUN apt-get update \
5555
git \
5656
python3 \
5757
python3-pip \
58-
&& pip install --upgrade pip setuptools wheel \
59-
&& pip install -r requirements.txt \
58+
python3-wheel \
59+
&& pip install --break-system-packages --upgrade setuptools \
60+
&& pip install --break-system-packages -r requirements.txt \
6061
&& apt autoremove -y \
6162
&& apt clean -y \
6263
&& rm -rf /tmp/* /var/tmp/* \

.github/workflows/build.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,7 @@ jobs:
613613
msystem: ${{matrix.sys}}
614614
install: >-
615615
base-devel
616+
git
616617
mingw-w64-${{matrix.env}}-toolchain
617618
mingw-w64-${{matrix.env}}-cmake
618619
mingw-w64-${{matrix.env}}-openblas

.github/workflows/docker.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ jobs:
2828
push_to_registry:
2929
name: Push Docker image to Docker Hub
3030

31-
runs-on: ubuntu-latest
31+
runs-on: ubuntu-22.04
3232
env:
3333
COMMIT_SHA: ${{ github.sha }}
3434
strategy:
35+
fail-fast: false
3536
matrix:
3637
config:
3738
# Multi-stage build

CMakeLists.txt

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ endif()
5050
if (MSVC)
5151
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
5252
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
53-
add_compile_options(/bigobj)
53+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/bigobj>")
54+
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
5455
endif()
5556

5657
#
@@ -187,27 +188,14 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
187188
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
188189
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
189190

190-
# At the moment some compile definitions are placed within the ggml/src
191-
# directory but not exported on the `ggml` target. This could be improved by
192-
# determining _precisely_ which defines are necessary for the llama-config
193-
# package.
194-
#
195-
set(GGML_TRANSIENT_DEFINES)
196-
get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
197-
get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
198-
if (GGML_DIR_DEFINES)
199-
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_DIR_DEFINES})
200-
endif()
201-
get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
202-
if (GGML_TARGET_DEFINES)
203-
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
204-
endif()
205-
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
206-
# all public headers
207191
set(LLAMA_PUBLIC_HEADERS
208192
${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
209193
${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
210-
set_target_properties(llama PROPERTIES PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
194+
195+
set_target_properties(llama
196+
PROPERTIES
197+
PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
198+
211199
install(TARGETS llama LIBRARY PUBLIC_HEADER)
212200

213201
configure_package_config_file(

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
1616

1717
## Hot topics
1818

19+
- **How to use [MTLResidencySet](https://developer.apple.com/documentation/metal/mtlresidencyset?language=objc) to keep the GPU memory active?** https://github.com/ggerganov/llama.cpp/pull/11427
1920
- **VS Code extension for FIM completions:** https://github.com/ggml-org/llama.vscode
2021
- Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim
2122
- Introducing GGUF-my-LoRA https://github.com/ggerganov/llama.cpp/discussions/10123

cmake/llama-config.cmake.in

Lines changed: 4 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -3,159 +3,13 @@ set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
33
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
44
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
55

6-
set(GGML_STATIC @GGML_STATIC@)
7-
set(GGML_NATIVE @GGML_NATIVE@)
8-
set(GGML_LTO @GGML_LTO@)
9-
set(GGML_CCACHE @GGML_CCACHE@)
10-
set(GGML_AVX @GGML_AVX@)
11-
set(GGML_AVX2 @GGML_AVX2@)
12-
set(GGML_AVX512 @GGML_AVX512@)
13-
set(GGML_AVX512_VBMI @GGML_AVX512_VBMI@)
14-
set(GGML_AVX512_VNNI @GGML_AVX512_VNNI@)
15-
set(GGML_AVX512_BF16 @GGML_AVX512_BF16@)
16-
set(GGML_AMX_TILE @GGML_AMX_TILE@)
17-
set(GGML_AMX_INT8 @GGML_AMX_INT8@)
18-
set(GGML_AMX_BF16 @GGML_AMX_BF16@)
19-
set(GGML_FMA @GGML_FMA@)
20-
set(GGML_LASX @GGML_LASX@)
21-
set(GGML_LSX @GGML_LSX@)
22-
set(GGML_RVV @GGML_RVV@)
23-
set(GGML_SVE @GGML_SVE@)
24-
25-
set(GGML_ACCELERATE @GGML_ACCELERATE@)
26-
set(GGML_OPENMP @GGML_OPENMP@)
27-
set(GGML_CPU_HBM @GGML_CPU_HBM@)
28-
set(GGML_BLAS_VENDOR @GGML_BLAS_VENDOR@)
29-
30-
set(GGML_CUDA_FORCE_MMQ @GGML_CUDA_FORCE_MMQ@)
31-
set(GGML_CUDA_FORCE_CUBLAS @GGML_CUDA_FORCE_CUBLAS@)
32-
set(GGML_CUDA_F16 @GGML_CUDA_F16@)
33-
set(GGML_CUDA_PEER_MAX_BATCH_SIZE @GGML_CUDA_PEER_MAX_BATCH_SIZE@)
34-
set(GGML_CUDA_NO_PEER_COPY @GGML_CUDA_NO_PEER_COPY@)
35-
set(GGML_CUDA_NO_VMM @GGML_CUDA_NO_VMM@)
36-
set(GGML_CUDA_FA_ALL_QUANTS @GGML_CUDA_FA_ALL_QUANTS@)
37-
set(GGML_CUDA_GRAPHS @GGML_CUDA_GRAPHS@)
38-
39-
set(GGML_HIP_UMA @GGML_HIP_UMA@)
40-
41-
set(GGML_VULKAN_CHECK_RESULTS @GGML_VULKAN_CHECK_RESULTS@)
42-
set(GGML_VULKAN_DEBUG @GGML_VULKAN_DEBUG@)
43-
set(GGML_VULKAN_MEMORY_DEBUG @GGML_VULKAN_MEMORY_DEBUG@)
44-
set(GGML_VULKAN_SHADER_DEBUG_INFO @GGML_VULKAN_SHADER_DEBUG_INFO@)
45-
set(GGML_VULKAN_PERF @GGML_VULKAN_PERF@)
46-
set(GGML_VULKAN_VALIDATE @GGML_VULKAN_VALIDATE@)
47-
set(GGML_VULKAN_RUN_TESTS @GGML_VULKAN_RUN_TESTS@)
48-
49-
set(GGML_METAL_USE_BF16 @GGML_METAL_USE_BF16@)
50-
set(GGML_METAL_NDEBUG @GGML_METAL_NDEBUG@)
51-
set(GGML_METAL_SHADER_DEBUG @GGML_METAL_SHADER_DEBUG@)
52-
set(GGML_METAL_EMBED_LIBRARY @GGML_METAL_EMBED_LIBRARY@)
53-
set(GGML_METAL_MACOSX_VERSION_MIN @GGML_METAL_MACOSX_VERSION_MIN@)
54-
set(GGML_METAL_STD @GGML_METAL_STD@)
55-
56-
set(GGML_SYCL_F16 @GGML_SYCL_F16@)
57-
set(GGML_SYCL_TARGET @GGML_SYCL_TARGET@)
58-
set(GGML_SYCL_DEVICE_ARCH @GGML_SYCL_DEVICE_ARCH@)
59-
60-
616
@PACKAGE_INIT@
627

638
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
649
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
6510
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
6611

67-
find_package(Threads REQUIRED)
68-
69-
set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@")
70-
set(_llama_link_deps "")
71-
set(_llama_link_opts "")
72-
foreach(_ggml_lib ggml ggml-base)
73-
string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY")
74-
find_library(${_ggml_lib_var} ${_ggml_lib}
75-
REQUIRED
76-
HINTS ${LLAMA_LIB_DIR}
77-
NO_CMAKE_FIND_ROOT_PATH
78-
)
79-
list(APPEND _llama_link_deps "${${_ggml_lib_var}}")
80-
message(STATUS "Found ${${_ggml_lib_var}}")
81-
endforeach()
82-
83-
foreach(backend amx blas cann cpu cuda hip kompute metal musa rpc sycl vulkan)
84-
string(TOUPPER "GGML_${backend}" backend_id)
85-
set(_ggml_lib "ggml-${backend}")
86-
string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY")
87-
88-
find_library(${_ggml_lib_var} ${_ggml_lib}
89-
HINTS ${LLAMA_LIB_DIR}
90-
NO_CMAKE_FIND_ROOT_PATH
91-
)
92-
if(${_ggml_lib_var})
93-
list(APPEND _llama_link_deps "${${_ggml_lib_var}}")
94-
set(${backend_id} ON)
95-
message(STATUS "Found backend ${${_ggml_lib_var}}")
96-
else()
97-
set(${backend_id} OFF)
98-
endif()
99-
endforeach()
100-
101-
if (NOT LLAMA_SHARED_LIB)
102-
if (APPLE AND GGML_ACCELERATE)
103-
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
104-
list(APPEND _llama_link_deps ${ACCELERATE_FRAMEWORK})
105-
endif()
106-
107-
if (GGML_OPENMP)
108-
find_package(OpenMP REQUIRED)
109-
list(APPEND _llama_link_deps OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
110-
endif()
111-
112-
if (GGML_CPU_HBM)
113-
find_library(memkind memkind REQUIRED)
114-
list(APPEND _llama_link_deps memkind)
115-
endif()
116-
117-
if (GGML_BLAS)
118-
find_package(BLAS REQUIRED)
119-
list(APPEND _llama_link_deps ${BLAS_LIBRARIES})
120-
list(APPEND _llama_link_opts ${BLAS_LINKER_FLAGS})
121-
endif()
122-
123-
if (GGML_CUDA)
124-
find_package(CUDAToolkit REQUIRED)
125-
endif()
126-
127-
if (GGML_METAL)
128-
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
129-
find_library(METAL_FRAMEWORK Metal REQUIRED)
130-
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
131-
list(APPEND _llama_link_deps ${FOUNDATION_LIBRARY}
132-
${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
133-
endif()
134-
135-
if (GGML_VULKAN)
136-
find_package(Vulkan REQUIRED)
137-
list(APPEND _llama_link_deps Vulkan::Vulkan)
138-
endif()
139-
140-
if (GGML_HIP)
141-
find_package(hip REQUIRED)
142-
find_package(hipblas REQUIRED)
143-
find_package(rocblas REQUIRED)
144-
list(APPEND _llama_link_deps hip::host roc::rocblas roc::hipblas)
145-
endif()
146-
147-
if (GGML_SYCL)
148-
find_package(DNNL)
149-
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
150-
list(APPEND _llama_link_deps DNNL::dnnl)
151-
endif()
152-
if (WIN32)
153-
find_package(IntelSYCL REQUIRED)
154-
find_package(MKL REQUIRED)
155-
list(APPEND _llama_link_deps IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
156-
endif()
157-
endif()
158-
endif()
12+
find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake)
15913

16014
find_library(llama_LIBRARY llama
16115
REQUIRED
@@ -167,12 +21,10 @@ add_library(llama UNKNOWN IMPORTED)
16721
set_target_properties(llama
16822
PROPERTIES
16923
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
170-
INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
171-
INTERFACE_LINK_OPTIONS "${_llama_link_opts}"
172-
INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
24+
INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;"
17325
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
17426
IMPORTED_LOCATION "${llama_LIBRARY}"
175-
INTERFACE_COMPILE_FEATURES cxx_std_11
176-
POSITION_INDEPENDENT_CODE ON )
27+
INTERFACE_COMPILE_FEATURES c_std_90
28+
POSITION_INDEPENDENT_CODE ON)
17729

17830
check_required_components(Llama)

examples/main-cmake-pkg/CMakeLists.txt

Lines changed: 0 additions & 32 deletions
This file was deleted.

examples/main-cmake-pkg/README.md

Lines changed: 0 additions & 31 deletions
This file was deleted.

0 commit comments

Comments
 (0)