Skip to content

Commit 1996d8f

Browse files
authored
Merge branch 'main' into main
2 parents 86ac1f6 + 7e20e34 commit 1996d8f

File tree

17 files changed

+235
-111
lines changed

17 files changed

+235
-111
lines changed

.github/workflows/build-and-release.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ${{ matrix.os }}
1212
strategy:
1313
matrix:
14-
os: [ubuntu-20.04, windows-2019, macos-11]
14+
os: [ubuntu-20.04, windows-2019, macos-12]
1515

1616
steps:
1717
- uses: actions/checkout@v4
@@ -29,7 +29,7 @@ jobs:
2929
python -m pip install -e .[all]
3030
3131
- name: Build wheels
32-
uses: pypa/[email protected].0
32+
uses: pypa/[email protected].1
3333
env:
3434
# disable repair
3535
CIBW_REPAIR_WHEEL_COMMAND: ""
@@ -56,7 +56,7 @@ jobs:
5656
platforms: linux/arm64
5757

5858
- name: Build wheels
59-
uses: pypa/[email protected].0
59+
uses: pypa/[email protected].1
6060
env:
6161
CIBW_SKIP: "*musllinux* pp*"
6262
CIBW_REPAIR_WHEEL_COMMAND: ""

.github/workflows/build-docker.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131

3232
- name: Build and push
3333
id: docker_build
34-
uses: docker/build-push-action@v5
34+
uses: docker/build-push-action@v6
3535
with:
3636
context: .
3737
file: "docker/simple/Dockerfile"

.github/workflows/build-wheels-cuda.yaml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
id: set-matrix
2121
run: |
2222
$matrix = @{
23-
'os' = @('ubuntu-latest', 'windows-latest')
23+
'os' = @('ubuntu-latest', 'windows-2019')
2424
'pyver' = @("3.9", "3.10", "3.11", "3.12")
2525
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1")
2626
'releasetag' = @("basic")
@@ -43,6 +43,12 @@ jobs:
4343
AVXVER: ${{ matrix.releasetag }}
4444

4545
steps:
46+
- name: Add MSBuild to PATH
47+
if: runner.os == 'Windows'
48+
uses: microsoft/[email protected]
49+
with:
50+
vs-version: '[16.11,16.12)'
51+
4652
- uses: actions/checkout@v4
4753
with:
4854
submodules: "recursive"
@@ -85,7 +91,7 @@ jobs:
8591
if: runner.os == 'Windows'
8692
run: |
8793
$y = (gi '.\MSBuildExtensions').fullname + '\*'
88-
(gi 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
94+
(gi 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
8995
$cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_')
9096
echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV
9197
@@ -108,7 +114,7 @@ jobs:
108114
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
109115
}
110116
$env:VERBOSE = '1'
111-
$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all'
117+
$env:CMAKE_ARGS = '-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all'
112118
$env:CMAKE_ARGS = "-DLLAMA_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
113119
# if ($env:AVXVER -eq 'AVX') {
114120
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'

.github/workflows/build-wheels-metal.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
python -m pip install -e .[all]
3131
3232
- name: Build wheels
33-
uses: pypa/cibuildwheel@v2.18.1
33+
uses: pypa/cibuildwheel@v2.19.1
3434
env:
3535
# disable repair
3636
CIBW_REPAIR_WHEEL_COMMAND: ""

.github/workflows/test.yaml

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -81,32 +81,6 @@ jobs:
8181
run: |
8282
python -m pytest
8383
84-
# build-linux-opencl:
85-
86-
# runs-on: ubuntu-latest
87-
88-
# steps:
89-
# - uses: actions/checkout@v4
90-
# with:
91-
# submodules: "recursive"
92-
# - name: Set up Python 3.8
93-
# uses: actions/setup-python@v5
94-
# with:
95-
# python-version: "3.8"
96-
# - name: Set up OpenCL & CLBlast
97-
# run: |
98-
# wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
99-
# echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
100-
# sudo apt-get update
101-
# sudo apt-get install -y --no-install-recommends llvm intel-oneapi-runtime-opencl intel-oneapi-runtime-compilers libclblast-dev
102-
# - name: Install dependencies
103-
# run: |
104-
# python -m pip install --upgrade pip
105-
# CMAKE_ARGS="-DLLAMA_CLBLAST=on" python -m pip install .[all] --verbose
106-
# - name: Test with pytest
107-
# run: |
108-
# python -m pytest
109-
11084
11185
build-macos-metal:
11286

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.81]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@968967376dc2c018d29f897c4883d335bbf384fb
13+
- fix(ci): Fix CUDA wheels, use LLAMA_CUDA instead of removed LLAMA_CUBLAS by @abetlen in 4fb6fc12a02a68884c25dd9f6a421cacec7604c6
14+
- fix(ci): Fix MacOS release, use macos-12 image instead of removed macos-11 by @abetlen in 3a551eb5263fdbd24b36d7770856374c04e92788
15+
16+
## [0.2.80]
17+
18+
- feat: Update llama.cpp to ggerganov/llama.cpp@023b8807e10bc3ade24a255f01c1ad2a01bb4228
19+
- fix(server): Fix bug in FastAPI streaming response where dependency was released before request completes causing SEGFAULT by @abetlen in 296304b60bb83689659883c9cc24f4c074dd88ff
20+
- fix(server): Update default config value for embeddings to False to fix error in text generation where logits were not allocated by llama.cpp by @abetlen in bf5e0bb4b151f4ca2f5a21af68eb832a96a79d75
21+
- fix(ci): Fix the CUDA workflow by @oobabooga in #1551
22+
- docs: Update readme examples to use newer Qwen2 model by @jncraton in #1544
23+
24+
## [0.2.79]
25+
26+
- feat: Update llama.cpp to ggerganov/llama.cpp@9c77ec1d74874ee22bdef8f110e8e8d41389abf2
27+
- feat(ci): Update workflows and pre-built wheels by @Smartappli in #1416
28+
- feat: Add .close() method to Llama class to explicitly free model from memory by @jkawamoto in #1513
29+
- feat: Support SPM infill by @CISC in #1492
30+
1031
## [0.2.78]
1132

1233
- feat: Update llama.cpp to ggerganov/llama.cpp@fd5ea0f897ecb3659d6c269ef6f3d833e865ead7

CMakeLists.txt

Lines changed: 73 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,56 @@ project(llama_cpp)
55
option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python package" ON)
66
option(LLAVA_BUILD "Build llava shared library and install alongside python package" ON)
77

8+
function(llama_cpp_python_install_target target)
9+
install(
10+
TARGETS ${target}
11+
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
12+
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
13+
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
14+
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
15+
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
16+
)
17+
install(
18+
TARGETS ${target}
19+
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
20+
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
21+
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
22+
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
23+
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
24+
)
25+
set_target_properties(${target} PROPERTIES
26+
INSTALL_RPATH "$ORIGIN"
27+
BUILD_WITH_INSTALL_RPATH TRUE
28+
)
29+
if(UNIX)
30+
if(APPLE)
31+
set_target_properties(${target} PROPERTIES
32+
INSTALL_RPATH "@loader_path"
33+
BUILD_WITH_INSTALL_RPATH TRUE
34+
)
35+
else()
36+
set_target_properties(${target} PROPERTIES
37+
INSTALL_RPATH "$ORIGIN"
38+
BUILD_WITH_INSTALL_RPATH TRUE
39+
)
40+
endif()
41+
endif()
42+
endfunction()
43+
844
if (LLAMA_BUILD)
945
set(BUILD_SHARED_LIBS "On")
1046

47+
set(CMAKE_SKIP_BUILD_RPATH FALSE)
48+
49+
# When building, don't use the install RPATH already
50+
# (but later on when installing)
51+
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
52+
53+
# Add the automatically determined parts of the RPATH
54+
# which point to directories outside the build tree to the install RPATH
55+
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
56+
set(CMAKE_SKIP_RPATH FALSE)
57+
1158
# Building llama
1259
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
1360
# Need to disable these llama.cpp flags on Apple x86_64,
@@ -23,32 +70,26 @@ if (LLAMA_BUILD)
2370
endif()
2471

2572
add_subdirectory(vendor/llama.cpp)
26-
install(
27-
TARGETS llama
28-
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
29-
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
30-
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
31-
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
32-
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
33-
)
34-
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
35-
install(
36-
TARGETS llama
37-
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
38-
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
39-
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
40-
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
41-
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
42-
)
73+
llama_cpp_python_install_target(llama)
74+
llama_cpp_python_install_target(ggml)
75+
4376
# Workaround for Windows + CUDA https://github.com/abetlen/llama-cpp-python/issues/563
44-
if (WIN32 AND (LLAMA_CUDA OR LLAMA_CUBLAS))
77+
if (WIN32)
4578
install(
4679
FILES $<TARGET_RUNTIME_DLLS:llama>
47-
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
80+
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
4881
)
4982
install(
5083
FILES $<TARGET_RUNTIME_DLLS:llama>
51-
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
84+
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
85+
)
86+
install(
87+
FILES $<TARGET_RUNTIME_DLLS:ggml>
88+
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
89+
)
90+
install(
91+
FILES $<TARGET_RUNTIME_DLLS:ggml>
92+
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
5293
)
5394
endif()
5495

@@ -69,22 +110,16 @@ if (LLAMA_BUILD)
69110
if (WIN32)
70111
set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
71112
endif()
72-
install(
73-
TARGETS llava_shared
74-
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
75-
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
76-
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
77-
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
78-
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
79-
)
80-
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
81-
install(
82-
TARGETS llava_shared
83-
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
84-
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
85-
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
86-
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
87-
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
88-
)
113+
llama_cpp_python_install_target(llava_shared)
114+
if (WIN32)
115+
install(
116+
FILES $<TARGET_RUNTIME_DLLS:llava_shared>
117+
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
118+
)
119+
install(
120+
FILES $<TARGET_RUNTIME_DLLS:llava_shared>
121+
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
122+
)
123+
endif()
89124
endif()
90-
endif()
125+
endif()

Makefile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@ build.debug:
2424
build.cuda:
2525
CMAKE_ARGS="-DLLAMA_CUDA=on" python3 -m pip install --verbose -e .
2626

27-
build.opencl:
28-
CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install --verbose -e .
29-
3027
build.openblas:
3128
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" python3 -m pip install --verbose -e .
3229

README.md

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -165,17 +165,6 @@ pip install llama-cpp-python \
165165
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/metal
166166
```
167167

168-
</details>
169-
<details>
170-
171-
<summary>CLBlast (OpenCL)</summary>
172-
173-
To install with CLBlast, set the `LLAMA_CLBLAST=on` environment variable before installing:
174-
175-
```bash
176-
CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python
177-
```
178-
179168
</details>
180169

181170
<details>
@@ -338,7 +327,7 @@ You'll need to install the `huggingface-hub` package to use this feature (`pip i
338327

339328
```python
340329
llm = Llama.from_pretrained(
341-
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
330+
repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
342331
filename="*q8_0.gguf",
343332
verbose=False
344333
)
@@ -699,7 +688,7 @@ For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_
699688
If you have `huggingface-hub` installed, you can also use the `--hf_model_repo_id` flag to load a model from the Hugging Face Hub.
700689

701690
```bash
702-
python3 -m llama_cpp.server --hf_model_repo_id Qwen/Qwen1.5-0.5B-Chat-GGUF --model '*q8_0.gguf'
691+
python3 -m llama_cpp.server --hf_model_repo_id Qwen/Qwen2-0.5B-Instruct-GGUF --model '*q8_0.gguf'
703692
```
704693

705694
### Web Server Features

llama_cpp/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.2.78"
4+
__version__ = "0.2.81"

0 commit comments

Comments
 (0)