Skip to content

Commit 2a890ec

Browse files
committed
Breaking change: unify the windows and linux build flags.
To do a full build on windows you now need LLAMA_PORTABLE=1 LLAMA_VULKAN=1 LLAMA_CLBLAST=1
1 parent 5f8f483 commit 2a890ec

File tree

7 files changed

+31
-87
lines changed

7 files changed

+31
-87
lines changed

.github/workflows/kcpp-build-release-win-cuda.yaml

Lines changed: 0 additions & 34 deletions
This file was deleted.

.github/workflows/kcpp-build-release-win-cuda12.yaml

Lines changed: 0 additions & 34 deletions
This file was deleted.

.github/workflows/kcpp-build-release-win-full-cu12.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
- name: Build Non-CUDA
4646
id: make_build
4747
run: |
48-
make -j ${env:NUMBER_OF_PROCESSORS}
48+
make LLAMA_CLBLAST=1 LLAMA_VULKAN=1 LLAMA_PORTABLE=1 -j ${env:NUMBER_OF_PROCESSORS}
4949
5050
- uses: Jimver/[email protected]
5151
id: cuda-toolkit

.github/workflows/kcpp-build-release-win-full.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
- name: Build Non-CUDA
4646
id: make_build
4747
run: |
48-
make -j ${env:NUMBER_OF_PROCESSORS}
48+
make LLAMA_CLBLAST=1 LLAMA_VULKAN=1 LLAMA_PORTABLE=1 -j ${env:NUMBER_OF_PROCESSORS}
4949
5050
- uses: Jimver/[email protected]
5151
id: cuda-toolkit

.github/workflows/kcpp-build-release-win-oldcpu-full.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
- name: Build Non-CUDA
4646
id: make_build
4747
run: |
48-
make -j ${env:NUMBER_OF_PROCESSORS} LLAMA_NOAVX2=1
48+
make LLAMA_CLBLAST=1 LLAMA_VULKAN=1 LLAMA_PORTABLE=1 -j ${env:NUMBER_OF_PROCESSORS} LLAMA_NOAVX2=1
4949
5050
- uses: Jimver/[email protected]
5151
id: cuda-toolkit

Makefile

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55

66
default: koboldcpp_default koboldcpp_failsafe koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2 finishedmsg
77
tools: quantize_gpt2 quantize_gptj quantize_gguf quantize_neox quantize_mpt quantize_clip whispermain sdmain gguf-split
8-
dev: koboldcpp_default
9-
dev2: koboldcpp_clblast
10-
dev3: koboldcpp_vulkan finishedmsg
118

129
ifndef UNAME_S
1310
UNAME_S := $(shell uname -s)
@@ -147,6 +144,7 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
147144
# Use all CPU extensions that are available:
148145
# old library NEEDS mf16c to work. so we must build with it. new one doesnt
149146
ifeq ($(OS),Windows_NT)
147+
ifdef LLAMA_PORTABLE
150148
CFLAGS +=
151149
NONECFLAGS +=
152150
SIMPLECFLAGS += -mavx -msse3
@@ -155,8 +153,10 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
155153
else
156154
FULLCFLAGS += -mavx2 -msse3 -mfma -mf16c -mavx
157155
endif
156+
else
157+
CFLAGS += -march=native -mtune=native
158+
endif
158159
else
159-
# if not on windows, they are clearly building it themselves, so lets just use whatever is supported
160160
ifdef LLAMA_PORTABLE
161161
CFLAGS +=
162162
NONECFLAGS +=
@@ -373,10 +373,17 @@ NOTIFY_MSG =
373373

374374
ifeq ($(OS),Windows_NT)
375375
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS)
376+
ifdef LLAMA_PORTABLE
376377
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS)
377378
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS)
379+
endif
380+
381+
ifdef LLAMA_CLBLAST
378382
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o [email protected] $(LDFLAGS)
383+
endif
384+
ifdef LLAMA_VULKAN
379385
VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ lib/vulkan-1.lib -shared -o [email protected] $(LDFLAGS)
386+
endif
380387

381388
ifdef LLAMA_CUBLAS
382389
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o [email protected] $(CUBLASLD_FLAGS) $(LDFLAGS)
@@ -409,18 +416,18 @@ else
409416
ifdef LLAMA_VULKAN
410417
VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ -lvulkan -shared -o [email protected] $(LDFLAGS)
411418
endif
419+
endif
412420

413-
ifndef LLAMA_CLBLAST
414-
ifndef LLAMA_CUBLAS
415-
ifndef LLAMA_HIPBLAS
416-
ifndef LLAMA_VULKAN
417-
ifndef LLAMA_METAL
418-
NOTIFY_MSG = @echo -e '\n***\nYou did a basic CPU build. For faster speeds, consider installing and linking a GPU BLAS library. For example, set LLAMA_VULKAN=1 to compile with Vulkan support. Read the KoboldCpp Wiki for more information. This is just a reminder, not an error.\n***\n'
419-
endif
420-
endif
421-
endif
422-
endif
423-
endif
421+
ifndef LLAMA_CLBLAST
422+
ifndef LLAMA_CUBLAS
423+
ifndef LLAMA_HIPBLAS
424+
ifndef LLAMA_VULKAN
425+
ifndef LLAMA_METAL
426+
NOTIFY_MSG = @echo -e '\n***\nYou did a basic CPU build. For faster speeds, consider installing and linking a GPU BLAS library. For example, set LLAMA_CLBLAST=1 LLAMA_VULKAN=1 to compile with Vulkan and CLBlast support. Add LLAMA_PORTABLE=1 to make a sharable build that other devices can use. Read the KoboldCpp Wiki for more information. This is just a reminder, not an error.\n***\n'
427+
endif
428+
endif
429+
endif
430+
endif
424431
endif
425432

426433

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,16 @@ when you can't use the precompiled binary directly, we provide an automated buil
8383
- For Debian: Install `libclblast-dev`.
8484
- You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`, (or `LLAMA_HIPBLAS=1` for AMD). You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows.
8585
- For a full featured build (all backends), do `make LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_VULKAN=1`. (Note that `LLAMA_CUBLAS=1` will not work on windows, you need visual studio)
86+
- To make your build sharable and capable of working on other devices, you must use `LLAMA_PORTABLE=1`
8687
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.gguf] [port]`
8788

8889
### Compiling on Windows
8990
- You're encouraged to use the .exe released, but if you want to compile your binaries from source at Windows, the easiest way is:
9091
- Get the latest release of w64devkit (https://github.com/skeeto/w64devkit). Be sure to use the "vanilla one", not i686 or other different stuff. If you try they will conflit with the precompiled libs!
9192
- Clone the repo with `git clone https://github.com/LostRuins/koboldcpp.git`
92-
- Make sure you are using the w64devkit integrated terminal, then run `make` at the KoboldCpp source folder. This will create the .dll files.
93+
- Make sure you are using the w64devkit integrated terminal, then run `make` at the KoboldCpp source folder. This will create the .dll files for a pure CPU native build.
94+
- For a full featured build (all backends), do `make LLAMA_CLBLAST=1 LLAMA_VULKAN=1`. (Note that `LLAMA_CUBLAS=1` will not work on windows, you need visual studio)
95+
- To make your build sharable and capable of working on other devices, you must use `LLAMA_PORTABLE=1`
9396
- If you want to generate the .exe file, make sure you have the python module PyInstaller installed with pip (`pip install PyInstaller`). Then run the script `make_pyinstaller.bat`
9497
- The koboldcpp.exe file will be at your dist folder.
9598
- **Building with CUDA**: Visual Studio, CMake and CUDA Toolkit is required. Clone the repo, then open the CMake file and compile it in Visual Studio. Copy the `koboldcpp_cublas.dll` generated into the same directory as the `koboldcpp.py` file. If you are bundling executables, you may need to include CUDA dynamic libraries (such as `cublasLt64_11.dll` and `cublas64_11.dll`) in order for the executable to work correctly on a different PC.
@@ -104,6 +107,7 @@ when you can't use the precompiled binary directly, we provide an automated buil
104107
- You can compile your binaries from source. You can clone the repo with `git clone https://github.com/LostRuins/koboldcpp.git`
105108
- A makefile is provided, simply run `make`.
106109
- If you want Metal GPU support, instead run `make LLAMA_METAL=1`, note that MacOS metal libraries need to be installed.
110+
- To make your build sharable and capable of working on other devices, you must use `LLAMA_PORTABLE=1`
107111
- After all binaries are built, you can run the python script with the command `koboldcpp.py --model [ggml_model.gguf]` (and add `--gpulayers (number of layer)` if you wish to offload layers to GPU).
108112

109113
### Compiling on Android (Termux Installation)
@@ -114,6 +118,7 @@ when you can't use the precompiled binary directly, we provide an automated buil
114118
- Clone the repo `git clone https://github.com/LostRuins/koboldcpp.git`
115119
- Navigate to the koboldcpp folder `cd koboldcpp`
116120
- Build the project `make`
121+
- To make your build sharable and capable of working on other devices, you must use `LLAMA_PORTABLE=1`, this disables usage of ARM instrinsics.
117122
- Grab a small GGUF model, such as `wget https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf`
118123
- Start the python server `python koboldcpp.py --model KobbleTiny-Q4_K.gguf`
119124
- Connect to `http://localhost:5001` on your mobile browser

0 commit comments

Comments
 (0)