abetlen
diff --git a/‎.github/workflows/build-and-release.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build-and-release.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/build-wheels-cuda.yaml‎
Lines changed: 6 additions & 6 deletions b/‎.github/workflows/build-wheels-cuda.yaml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎.github/workflows/build-wheels-metal.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build-wheels-metal.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/test.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 6 additions & 6 deletions b/‎CMakeLists.txt‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎Makefile‎
Lines changed: 9 additions & 9 deletions b/‎Makefile‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎README.md‎
Lines changed: 21 additions & 31 deletions b/‎README.md‎
Lines changed: 21 additions & 31 deletions
diff --git a/‎docker/cuda_simple/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎docker/cuda_simple/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/open_llama/Dockerfile‎
Lines changed: 4 additions & 2 deletions b/‎docker/open_llama/Dockerfile‎
Lines changed: 4 additions & 2 deletions
@@ -29,7 +29,7 @@ jobs:
           python -m pip install -e .[all]
 
       - name: Build wheels
-        uses: pypa/[email protected].1
+        uses: pypa/[email protected].2
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
@@ -56,7 +56,7 @@ jobs:
           platforms: linux/arm64
 
       - name: Build wheels
-        uses: pypa/[email protected].1
+        uses: pypa/[email protected].2
         env:
           CIBW_SKIP: "*musllinux* pp*"
           CIBW_REPAIR_WHEEL_COMMAND: ""
 
@@ -45,7 +45,7 @@ jobs:
     steps:
       - name: Add MSBuild to PATH
         if: runner.os == 'Windows'
-        uses: microsoft/setup-msbuild@v1.1
+        uses: microsoft/setup-msbuild@v1.3
         with:
           vs-version: '[16.11,16.12)'
 
@@ -114,16 +114,16 @@ jobs:
             $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
           }
           $env:VERBOSE = '1'
-          $env:CMAKE_ARGS = '-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all'
-          $env:CMAKE_ARGS = "-DLLAMA_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
+          $env:CMAKE_ARGS = '-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all'
+          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
           # if ($env:AVXVER -eq 'AVX') {
-          $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
+          $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
           # }
           # if ($env:AVXVER -eq 'AVX512') {
-          #  $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX512=on'
+          #  $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX512=on'
           # }
           # if ($env:AVXVER -eq 'basic') {
-          #  $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
+          #  $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=off -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
           # }
           python -m build --wheel
           # write the build tag to the output
 
@@ -30,12 +30,12 @@ jobs:
           python -m pip install -e .[all]
 
       - name: Build wheels
-        uses: pypa/[email protected].1
+        uses: pypa/[email protected].2
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_ARCHS: "arm64"
-          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DLLAMA_METAL=on"
+          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on"
           CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
         with:
           package-dir: .
 
@@ -97,7 +97,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          CMAKE_ARGS="-DLLAMA_METAL=on" python -m pip install .[all] --verbose
+          CMAKE_ARGS="-DGGML_METAL=on" python -m pip install .[all] --verbose
       - name: Test with pytest
         run: |
           python -m pytest
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.82]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@7fdb6f73e35605c8dbc39e9f19cd9ed84dbc87f2
+
 ## [0.2.81]
 
 - feat: Update llama.cpp to ggerganov/llama.cpp@968967376dc2c018d29f897c4883d335bbf384fb
 
@@ -59,14 +59,14 @@ if (LLAMA_BUILD)
     if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
         # Need to disable these llama.cpp flags on Apple x86_64,
         # otherwise users may encounter invalid instruction errors
-        set(LLAMA_AVX "Off" CACHE BOOL "llama: enable AVX" FORCE)
-        set(LLAMA_AVX2 "Off" CACHE BOOL "llama: enable AVX2" FORCE)
-        set(LLAMA_FMA "Off" CACHE BOOL "llama: enable FMA" FORCE)
-        set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE)
+        set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE)
+        set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
+        set(GGML_FMA "Off" CACHE BOOL "gml: enable FMA" FORCE)
+        set(GGML_F16C "Off" CACHE BOOL "gml: enable F16C" FORCE)
     endif()
 
     if (APPLE)
-        set(LLAMA_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
+        set(GGML_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
     endif()
 
     add_subdirectory(vendor/llama.cpp)
@@ -122,4 +122,4 @@ if (LLAMA_BUILD)
             )
         endif()
     endif()
-endif()
+endif()
@@ -22,28 +22,28 @@ build.debug:
 		--editable .
 
 build.cuda:
-	CMAKE_ARGS="-DLLAMA_CUDA=on" python3 -m pip install --verbose -e .
+	CMAKE_ARGS="-DGGML_CUDA=on" python3 -m pip install --verbose -e .
 
 build.openblas:
-	CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" python3 -m pip install --verbose -e .
+	CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" python3 -m pip install --verbose -e .
 
 build.blis:
-	CMAKE_ARGS="-DLLAMA_BLAS=on -DLLAMA_BLAS_VENDOR=FLAME" python3 -m pip install --verbose -e .
+	CMAKE_ARGS="-DGGML_BLAS=on -DGGML_BLAS_VENDOR=FLAME" python3 -m pip install --verbose -e .
 
 build.metal:
-	CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install --verbose -e .
+	CMAKE_ARGS="-DGGML_METAL=on" python3 -m pip install --verbose -e .
 
 build.vulkan:
-	CMAKE_ARGS="-DLLAMA_VULKAN=on" python3 -m pip install --verbose -e .
+	CMAKE_ARGS="-DGGML_VULKAN=on" python3 -m pip install --verbose -e .
 
 build.kompute:
-	CMAKE_ARGS="-DLLAMA_KOMPUTE=on" python3 -m pip install --verbose -e .
+	CMAKE_ARGS="-DGGML_KOMPUTE=on" python3 -m pip install --verbose -e .
 
 build.sycl:
-	CMAKE_ARGS="-DLLAMA_SYCL=on" python3 -m pip install --verbose -e .
+	CMAKE_ARGS="-DGGML_SYCL=on" python3 -m pip install --verbose -e .
 
 build.rpc:
-	CMAKE_ARGS="-DLLAMA_RPC=on" python3 -m pip install --verbose -e .
+	CMAKE_ARGS="-DGGML_RPC=on" python3 -m pip install --verbose -e .
 
 build.sdist:
 	python3 -m build --sdist
@@ -85,4 +85,4 @@ clean:
 	deploy.pypi \
 	deploy.gh-docs \
 	docker \
-	clean
+	clean
@@ -64,13 +64,13 @@ All `llama.cpp` cmake build options can be set via the `CMAKE_ARGS` environment
 
 ```bash
 # Linux and Mac
-CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" \
+CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
   pip install llama-cpp-python
 ```
 
 ```powershell
 # Windows
-$env:CMAKE_ARGS = "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
+$env:CMAKE_ARGS = "-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
 pip install llama-cpp-python
 ```
 </details>
@@ -83,13 +83,13 @@ They can also be set via `pip install -C / --config-settings` command and saved
 ```bash
 pip install --upgrade pip # ensure pip is up to date
 pip install llama-cpp-python \
-  -C cmake.args="-DLLAMA_BLAS=ON;-DLLAMA_BLAS_VENDOR=OpenBLAS"
+  -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 ```
 
 ```txt
 # requirements.txt
 
-llama-cpp-python -C cmake.args="-DLLAMA_BLAS=ON;-DLLAMA_BLAS_VENDOR=OpenBLAS"
+llama-cpp-python -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 ```
 
 </details>
@@ -101,20 +101,20 @@ Below are some common backends, their build commands and any additional environm
 <details open>
 <summary>OpenBLAS (CPU)</summary>
 
-To install with OpenBLAS, set the `LLAMA_BLAS` and `LLAMA_BLAS_VENDOR` environment variables before installing:
+To install with OpenBLAS, set the `GGML_BLAS` and `GGML_BLAS_VENDOR` environment variables before installing:
 
 ```bash
-CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python
+CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python
 ```
 </details>
 
 <details>
 <summary>CUDA</summary>
 
-To install with CUDA support, set the `LLAMA_CUDA=on` environment variable before installing:
+To install with CUDA support, set the `GGML_CUDA=on` environment variable before installing:
 
 ```bash
-CMAKE_ARGS="-DLLAMA_CUDA=on" pip install llama-cpp-python
+CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
 ```
 
 **Pre-built Wheel (New)**
@@ -147,10 +147,10 @@ pip install llama-cpp-python \
 <details>
 <summary>Metal</summary>
 
-To install with Metal (MPS), set the `LLAMA_METAL=on` environment variable before installing:
+To install with Metal (MPS), set the `GGML_METAL=on` environment variable before installing:
 
 ```bash
-CMAKE_ARGS="-DLLAMA_METAL=on" pip install llama-cpp-python
+CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python
 ```
 
 **Pre-built Wheel (New)**
@@ -170,54 +170,44 @@ pip install llama-cpp-python \
 <details>
 <summary>hipBLAS (ROCm)</summary>
 
-To install with hipBLAS / ROCm support for AMD cards, set the `LLAMA_HIPBLAS=on` environment variable before installing:
+To install with hipBLAS / ROCm support for AMD cards, set the `GGML_HIPBLAS=on` environment variable before installing:
 
 ```bash
-CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python
+CMAKE_ARGS="-DGGML_HIPBLAS=on" pip install llama-cpp-python
 ```
 
 </details>
 
 <details>
 <summary>Vulkan</summary>
 
-To install with Vulkan support, set the `LLAMA_VULKAN=on` environment variable before installing:
+To install with Vulkan support, set the `GGML_VULKAN=on` environment variable before installing:
 
 ```bash
-CMAKE_ARGS="-DLLAMA_VULKAN=on" pip install llama-cpp-python
+CMAKE_ARGS="-DGGML_VULKAN=on" pip install llama-cpp-python
 ```
 
 </details>
 
-<details>
-<summary>Kompute</summary>
-
-To install with Kompute support, set the `LLAMA_KOMPUTE=on` environment variable before installing:
-
-```bash
-CMAKE_ARGS="-DLLAMA_KOMPUTE=on" pip install llama-cpp-python
-```
-</details>
-
 <details>
 <summary>SYCL</summary>
 
-To install with SYCL support, set the `LLAMA_SYCL=on` environment variable before installing:
+To install with SYCL support, set the `GGML_SYCL=on` environment variable before installing:
 
 ```bash
 source /opt/intel/oneapi/setvars.sh   
-CMAKE_ARGS="-DLLAMA_SYCL=on -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx" pip install llama-cpp-python
+CMAKE_ARGS="-DGGML_SYCL=on -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx" pip install llama-cpp-python
 ```
 </details>
 
 <details>
 <summary>RPC</summary>
 
-To install with RPC support, set the `LLAMA_RPC=on` environment variable before installing:
+To install with RPC support, set the `GGML_RPC=on` environment variable before installing:
 
 ```bash
 source /opt/intel/oneapi/setvars.sh   
-CMAKE_ARGS="-DLLAMA_RPC=on" pip install llama-cpp-python
+CMAKE_ARGS="-DGGML_RPC=on" pip install llama-cpp-python
 ```
 </details>
 
@@ -231,7 +221,7 @@ If you run into issues where it complains it can't find `'nmake'` `'?'` or CMAKE
 
 ```ps
 $env:CMAKE_GENERATOR = "MinGW Makefiles"
-$env:CMAKE_ARGS = "-DLLAMA_OPENBLAS=on -DCMAKE_C_COMPILER=C:/w64devkit/bin/gcc.exe -DCMAKE_CXX_COMPILER=C:/w64devkit/bin/g++.exe"
+$env:CMAKE_ARGS = "-DGGML_OPENBLAS=on -DCMAKE_C_COMPILER=C:/w64devkit/bin/gcc.exe -DCMAKE_CXX_COMPILER=C:/w64devkit/bin/g++.exe"
 ```
 
 See the above instructions and set `CMAKE_ARGS` to the BLAS backend you want to use.
@@ -260,7 +250,7 @@ Otherwise, while installing it will build the llama.cpp x86 version which will b
 Try installing with
 
 ```bash
-CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DLLAMA_METAL=on" pip install --upgrade --verbose --force-reinstall --no-cache-dir llama-cpp-python
+CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on" pip install --upgrade --verbose --force-reinstall --no-cache-dir llama-cpp-python
 ```
 </details>
 
@@ -667,7 +657,7 @@ python3 -m llama_cpp.server --model models/7B/llama-model.gguf
 Similar to Hardware Acceleration section above, you can also install with GPU (cuBLAS) support like this:
 
 ```bash
-CMAKE_ARGS="-DLLAMA_CUDA=on" FORCE_CMAKE=1 pip install 'llama-cpp-python[server]'
+CMAKE_ARGS="-DGGML_CUDA=on" FORCE_CMAKE=1 pip install 'llama-cpp-python[server]'
 python3 -m llama_cpp.server --model models/7B/llama-model.gguf --n_gpu_layers 35
 ```
 
 
@@ -1,4 +1,4 @@
-ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
+ARG CUDA_IMAGE="12.5.0-devel-ubuntu22.04"
 FROM nvidia/cuda:${CUDA_IMAGE}
 
 # We need to set the host to 0.0.0.0 to allow outside access
 
@@ -1,5 +1,5 @@
 # Define the image argument and provide a default value
-ARG IMAGE=python:3-slim-bullseye
+ARG IMAGE=python:3-slim-bookworm
 
 # Use the image as specified
 FROM ${IMAGE}
@@ -12,7 +12,9 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco
     python3 \
     python3-pip \
     ninja-build \
-    build-essential
+    build-essential \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
 
 RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"`
	`1`	`+ARG CUDA_IMAGE="12.5.0-devel-ubuntu22.04"`
`2`	`2`	`FROM nvidia/cuda:${CUDA_IMAGE}`
`3`	`3`
`4`	`4`	`# We need to set the host to 0.0.0.0 to allow outside access`