Skip to content

Commit fad8f0d

Browse files
authored
Merge branch 'abetlen:main' into main
2 parents 1996d8f + f7f4fa8 commit fad8f0d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2433
-1416
lines changed

.github/workflows/build-and-release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
python -m pip install -e .[all]
3030
3131
- name: Build wheels
32-
uses: pypa/[email protected].1
32+
uses: pypa/[email protected].2
3333
env:
3434
# disable repair
3535
CIBW_REPAIR_WHEEL_COMMAND: ""
@@ -56,7 +56,7 @@ jobs:
5656
platforms: linux/arm64
5757

5858
- name: Build wheels
59-
uses: pypa/[email protected].1
59+
uses: pypa/[email protected].2
6060
env:
6161
CIBW_SKIP: "*musllinux* pp*"
6262
CIBW_REPAIR_WHEEL_COMMAND: ""

.github/workflows/build-wheels-cuda.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
steps:
4646
- name: Add MSBuild to PATH
4747
if: runner.os == 'Windows'
48-
uses: microsoft/setup-msbuild@v1.1
48+
uses: microsoft/setup-msbuild@v1.3
4949
with:
5050
vs-version: '[16.11,16.12)'
5151

@@ -114,16 +114,16 @@ jobs:
114114
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
115115
}
116116
$env:VERBOSE = '1'
117-
$env:CMAKE_ARGS = '-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all'
118-
$env:CMAKE_ARGS = "-DLLAMA_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
117+
$env:CMAKE_ARGS = '-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all'
118+
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
119119
# if ($env:AVXVER -eq 'AVX') {
120-
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
120+
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
121121
# }
122122
# if ($env:AVXVER -eq 'AVX512') {
123-
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX512=on'
123+
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX512=on'
124124
# }
125125
# if ($env:AVXVER -eq 'basic') {
126-
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
126+
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=off -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
127127
# }
128128
python -m build --wheel
129129
# write the build tag to the output

.github/workflows/build-wheels-metal.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@ jobs:
3030
python -m pip install -e .[all]
3131
3232
- name: Build wheels
33-
uses: pypa/[email protected].1
33+
uses: pypa/[email protected].2
3434
env:
3535
# disable repair
3636
CIBW_REPAIR_WHEEL_COMMAND: ""
3737
CIBW_ARCHS: "arm64"
38-
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DLLAMA_METAL=on"
38+
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on"
3939
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
4040
with:
4141
package-dir: .

.github/workflows/test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ jobs:
9797
- name: Install dependencies
9898
run: |
9999
python -m pip install --upgrade pip
100-
CMAKE_ARGS="-DLLAMA_METAL=on" python -m pip install .[all] --verbose
100+
CMAKE_ARGS="-DGGML_METAL=on" python -m pip install .[all] --verbose
101101
- name: Test with pytest
102102
run: |
103103
python -m pytest

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.82]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@7fdb6f73e35605c8dbc39e9f19cd9ed84dbc87f2
13+
1014
## [0.2.81]
1115

1216
- feat: Update llama.cpp to ggerganov/llama.cpp@968967376dc2c018d29f897c4883d335bbf384fb

CMakeLists.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@ if (LLAMA_BUILD)
5959
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
6060
# Need to disable these llama.cpp flags on Apple x86_64,
6161
# otherwise users may encounter invalid instruction errors
62-
set(LLAMA_AVX "Off" CACHE BOOL "llama: enable AVX" FORCE)
63-
set(LLAMA_AVX2 "Off" CACHE BOOL "llama: enable AVX2" FORCE)
64-
set(LLAMA_FMA "Off" CACHE BOOL "llama: enable FMA" FORCE)
65-
set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE)
62+
set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE)
63+
set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
64+
set(GGML_FMA "Off" CACHE BOOL "gml: enable FMA" FORCE)
65+
set(GGML_F16C "Off" CACHE BOOL "gml: enable F16C" FORCE)
6666
endif()
6767

6868
if (APPLE)
69-
set(LLAMA_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
69+
set(GGML_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
7070
endif()
7171

7272
add_subdirectory(vendor/llama.cpp)
@@ -122,4 +122,4 @@ if (LLAMA_BUILD)
122122
)
123123
endif()
124124
endif()
125-
endif()
125+
endif()

Makefile

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,28 @@ build.debug:
2222
--editable .
2323

2424
build.cuda:
25-
CMAKE_ARGS="-DLLAMA_CUDA=on" python3 -m pip install --verbose -e .
25+
CMAKE_ARGS="-DGGML_CUDA=on" python3 -m pip install --verbose -e .
2626

2727
build.openblas:
28-
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" python3 -m pip install --verbose -e .
28+
CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" python3 -m pip install --verbose -e .
2929

3030
build.blis:
31-
CMAKE_ARGS="-DLLAMA_BLAS=on -DLLAMA_BLAS_VENDOR=FLAME" python3 -m pip install --verbose -e .
31+
CMAKE_ARGS="-DGGML_BLAS=on -DGGML_BLAS_VENDOR=FLAME" python3 -m pip install --verbose -e .
3232

3333
build.metal:
34-
CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install --verbose -e .
34+
CMAKE_ARGS="-DGGML_METAL=on" python3 -m pip install --verbose -e .
3535

3636
build.vulkan:
37-
CMAKE_ARGS="-DLLAMA_VULKAN=on" python3 -m pip install --verbose -e .
37+
CMAKE_ARGS="-DGGML_VULKAN=on" python3 -m pip install --verbose -e .
3838

3939
build.kompute:
40-
CMAKE_ARGS="-DLLAMA_KOMPUTE=on" python3 -m pip install --verbose -e .
40+
CMAKE_ARGS="-DGGML_KOMPUTE=on" python3 -m pip install --verbose -e .
4141

4242
build.sycl:
43-
CMAKE_ARGS="-DLLAMA_SYCL=on" python3 -m pip install --verbose -e .
43+
CMAKE_ARGS="-DGGML_SYCL=on" python3 -m pip install --verbose -e .
4444

4545
build.rpc:
46-
CMAKE_ARGS="-DLLAMA_RPC=on" python3 -m pip install --verbose -e .
46+
CMAKE_ARGS="-DGGML_RPC=on" python3 -m pip install --verbose -e .
4747

4848
build.sdist:
4949
python3 -m build --sdist
@@ -85,4 +85,4 @@ clean:
8585
deploy.pypi \
8686
deploy.gh-docs \
8787
docker \
88-
clean
88+
clean

README.md

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,13 @@ All `llama.cpp` cmake build options can be set via the `CMAKE_ARGS` environment
6464

6565
```bash
6666
# Linux and Mac
67-
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" \
67+
CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
6868
pip install llama-cpp-python
6969
```
7070

7171
```powershell
7272
# Windows
73-
$env:CMAKE_ARGS = "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
73+
$env:CMAKE_ARGS = "-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
7474
pip install llama-cpp-python
7575
```
7676
</details>
@@ -83,13 +83,13 @@ They can also be set via `pip install -C / --config-settings` command and saved
8383
```bash
8484
pip install --upgrade pip # ensure pip is up to date
8585
pip install llama-cpp-python \
86-
-C cmake.args="-DLLAMA_BLAS=ON;-DLLAMA_BLAS_VENDOR=OpenBLAS"
86+
-C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
8787
```
8888

8989
```txt
9090
# requirements.txt
9191
92-
llama-cpp-python -C cmake.args="-DLLAMA_BLAS=ON;-DLLAMA_BLAS_VENDOR=OpenBLAS"
92+
llama-cpp-python -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
9393
```
9494

9595
</details>
@@ -101,20 +101,20 @@ Below are some common backends, their build commands and any additional environm
101101
<details open>
102102
<summary>OpenBLAS (CPU)</summary>
103103

104-
To install with OpenBLAS, set the `LLAMA_BLAS` and `LLAMA_BLAS_VENDOR` environment variables before installing:
104+
To install with OpenBLAS, set the `GGML_BLAS` and `GGML_BLAS_VENDOR` environment variables before installing:
105105

106106
```bash
107-
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python
107+
CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python
108108
```
109109
</details>
110110

111111
<details>
112112
<summary>CUDA</summary>
113113

114-
To install with CUDA support, set the `LLAMA_CUDA=on` environment variable before installing:
114+
To install with CUDA support, set the `GGML_CUDA=on` environment variable before installing:
115115

116116
```bash
117-
CMAKE_ARGS="-DLLAMA_CUDA=on" pip install llama-cpp-python
117+
CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
118118
```
119119

120120
**Pre-built Wheel (New)**
@@ -147,10 +147,10 @@ pip install llama-cpp-python \
147147
<details>
148148
<summary>Metal</summary>
149149

150-
To install with Metal (MPS), set the `LLAMA_METAL=on` environment variable before installing:
150+
To install with Metal (MPS), set the `GGML_METAL=on` environment variable before installing:
151151

152152
```bash
153-
CMAKE_ARGS="-DLLAMA_METAL=on" pip install llama-cpp-python
153+
CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python
154154
```
155155

156156
**Pre-built Wheel (New)**
@@ -170,54 +170,44 @@ pip install llama-cpp-python \
170170
<details>
171171
<summary>hipBLAS (ROCm)</summary>
172172

173-
To install with hipBLAS / ROCm support for AMD cards, set the `LLAMA_HIPBLAS=on` environment variable before installing:
173+
To install with hipBLAS / ROCm support for AMD cards, set the `GGML_HIPBLAS=on` environment variable before installing:
174174

175175
```bash
176-
CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python
176+
CMAKE_ARGS="-DGGML_HIPBLAS=on" pip install llama-cpp-python
177177
```
178178

179179
</details>
180180

181181
<details>
182182
<summary>Vulkan</summary>
183183

184-
To install with Vulkan support, set the `LLAMA_VULKAN=on` environment variable before installing:
184+
To install with Vulkan support, set the `GGML_VULKAN=on` environment variable before installing:
185185

186186
```bash
187-
CMAKE_ARGS="-DLLAMA_VULKAN=on" pip install llama-cpp-python
187+
CMAKE_ARGS="-DGGML_VULKAN=on" pip install llama-cpp-python
188188
```
189189

190190
</details>
191191

192-
<details>
193-
<summary>Kompute</summary>
194-
195-
To install with Kompute support, set the `LLAMA_KOMPUTE=on` environment variable before installing:
196-
197-
```bash
198-
CMAKE_ARGS="-DLLAMA_KOMPUTE=on" pip install llama-cpp-python
199-
```
200-
</details>
201-
202192
<details>
203193
<summary>SYCL</summary>
204194

205-
To install with SYCL support, set the `LLAMA_SYCL=on` environment variable before installing:
195+
To install with SYCL support, set the `GGML_SYCL=on` environment variable before installing:
206196

207197
```bash
208198
source /opt/intel/oneapi/setvars.sh
209-
CMAKE_ARGS="-DLLAMA_SYCL=on -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx" pip install llama-cpp-python
199+
CMAKE_ARGS="-DGGML_SYCL=on -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx" pip install llama-cpp-python
210200
```
211201
</details>
212202

213203
<details>
214204
<summary>RPC</summary>
215205

216-
To install with RPC support, set the `LLAMA_RPC=on` environment variable before installing:
206+
To install with RPC support, set the `GGML_RPC=on` environment variable before installing:
217207

218208
```bash
219209
source /opt/intel/oneapi/setvars.sh
220-
CMAKE_ARGS="-DLLAMA_RPC=on" pip install llama-cpp-python
210+
CMAKE_ARGS="-DGGML_RPC=on" pip install llama-cpp-python
221211
```
222212
</details>
223213

@@ -231,7 +221,7 @@ If you run into issues where it complains it can't find `'nmake'` `'?'` or CMAKE
231221

232222
```ps
233223
$env:CMAKE_GENERATOR = "MinGW Makefiles"
234-
$env:CMAKE_ARGS = "-DLLAMA_OPENBLAS=on -DCMAKE_C_COMPILER=C:/w64devkit/bin/gcc.exe -DCMAKE_CXX_COMPILER=C:/w64devkit/bin/g++.exe"
224+
$env:CMAKE_ARGS = "-DGGML_OPENBLAS=on -DCMAKE_C_COMPILER=C:/w64devkit/bin/gcc.exe -DCMAKE_CXX_COMPILER=C:/w64devkit/bin/g++.exe"
235225
```
236226

237227
See the above instructions and set `CMAKE_ARGS` to the BLAS backend you want to use.
@@ -260,7 +250,7 @@ Otherwise, while installing it will build the llama.cpp x86 version which will b
260250
Try installing with
261251

262252
```bash
263-
CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DLLAMA_METAL=on" pip install --upgrade --verbose --force-reinstall --no-cache-dir llama-cpp-python
253+
CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on" pip install --upgrade --verbose --force-reinstall --no-cache-dir llama-cpp-python
264254
```
265255
</details>
266256

@@ -667,7 +657,7 @@ python3 -m llama_cpp.server --model models/7B/llama-model.gguf
667657
Similar to Hardware Acceleration section above, you can also install with GPU (cuBLAS) support like this:
668658

669659
```bash
670-
CMAKE_ARGS="-DLLAMA_CUDA=on" FORCE_CMAKE=1 pip install 'llama-cpp-python[server]'
660+
CMAKE_ARGS="-DGGML_CUDA=on" FORCE_CMAKE=1 pip install 'llama-cpp-python[server]'
671661
python3 -m llama_cpp.server --model models/7B/llama-model.gguf --n_gpu_layers 35
672662
```
673663

docker/cuda_simple/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
1+
ARG CUDA_IMAGE="12.5.0-devel-ubuntu22.04"
22
FROM nvidia/cuda:${CUDA_IMAGE}
33

44
# We need to set the host to 0.0.0.0 to allow outside access

docker/open_llama/Dockerfile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Define the image argument and provide a default value
2-
ARG IMAGE=python:3-slim-bullseye
2+
ARG IMAGE=python:3-slim-bookworm
33

44
# Use the image as specified
55
FROM ${IMAGE}
@@ -12,7 +12,9 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco
1212
python3 \
1313
python3-pip \
1414
ninja-build \
15-
build-essential
15+
build-essential \
16+
&& apt-get clean \
17+
&& rm -rf /var/lib/apt/lists/*
1618

1719
RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context
1820

0 commit comments

Comments
 (0)