henk717
diff --git a/‎.devops/s390x.Dockerfile‎
Lines changed: 123 additions & 0 deletions b/‎.devops/s390x.Dockerfile‎
Lines changed: 123 additions & 0 deletions
diff --git a/‎.github/workflows/build-amd.yml‎
Lines changed: 52 additions & 0 deletions b/‎.github/workflows/build-amd.yml‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎.github/workflows/kcpp-build-release-arm64.yaml‎
Lines changed: 1 addition & 10 deletions b/‎.github/workflows/kcpp-build-release-arm64.yaml‎
Lines changed: 1 addition & 10 deletions
diff --git a/‎.github/workflows/kcpp-build-release-macos.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/kcpp-build-release-macos.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.windsurf/rules/css-architecture.md‎
Lines changed: 0 additions & 7 deletions b/‎.windsurf/rules/css-architecture.md‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎.windsurf/rules/sveltekit-architecture.md‎
Lines changed: 0 additions & 48 deletions b/‎.windsurf/rules/sveltekit-architecture.md‎
Lines changed: 0 additions & 48 deletions
diff --git a/‎.windsurf/rules/tests.md‎
Lines changed: 0 additions & 9 deletions b/‎.windsurf/rules/tests.md‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎.windsurf/rules/typescript-architecture.md‎
Lines changed: 0 additions & 7 deletions b/‎.windsurf/rules/typescript-architecture.md‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 4 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 20 additions & 9 deletions b/‎Makefile‎
Lines changed: 20 additions & 9 deletions
@@ -0,0 +1,123 @@
+ARG GCC_VERSION=15.2.0
+ARG UBUNTU_VERSION=24.04
+
+### Build Llama.cpp stage
+FROM gcc:${GCC_VERSION} AS build
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
+    apt update -y && \
+    apt upgrade -y && \
+    apt install -y --no-install-recommends \
+        git cmake ccache ninja-build \
+        # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
+        libopenblas-dev libcurl4-openssl-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY . .
+
+RUN --mount=type=cache,target=/root/.ccache \
+    --mount=type=cache,target=/app/build \
+    cmake -S . -B build -G Ninja \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+        -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+        -DLLAMA_BUILD_TESTS=OFF \
+        -DGGML_BACKEND_DL=OFF \
+        -DGGML_NATIVE=OFF \
+        -DGGML_BLAS=ON \
+        -DGGML_BLAS_VENDOR=OpenBLAS && \
+    cmake --build build --config Release -j $(nproc) && \
+    cmake --install build --prefix /opt/llama.cpp
+
+COPY *.py             /opt/llama.cpp/bin
+COPY .devops/tools.sh /opt/llama.cpp/bin
+
+COPY gguf-py          /opt/llama.cpp/gguf-py
+COPY requirements.txt /opt/llama.cpp/gguf-py
+COPY requirements     /opt/llama.cpp/gguf-py/requirements
+
+
+### Collect all llama.cpp binaries, libraries and distro libraries
+FROM scratch AS collector
+
+# Copy llama.cpp binaries and libraries
+COPY --from=build /opt/llama.cpp/bin     /llama.cpp/bin
+COPY --from=build /opt/llama.cpp/lib     /llama.cpp/lib
+COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
+
+
+### Base image
+FROM ubuntu:${UBUNTU_VERSION} AS base
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
+    apt update -y && \
+    apt install -y --no-install-recommends \
+        # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
+        # See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
+        curl libgomp1 libopenblas-dev && \
+    apt autoremove -y && \
+    apt clean -y && \
+    rm -rf /tmp/* /var/tmp/* && \
+    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
+    find /var/cache -type f -delete
+
+# Copy llama.cpp libraries
+COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
+
+
+### Full
+FROM base AS full
+
+ENV PATH="/root/.cargo/bin:${PATH}"
+WORKDIR /app
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
+    apt update -y && \
+    apt install -y \
+        git cmake libjpeg-dev \
+        python3 python3-pip python3-dev && \
+    apt autoremove -y && \
+    apt clean -y && \
+    rm -rf /tmp/* /var/tmp/* && \
+    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
+    find /var/cache -type f -delete
+
+RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
+
+COPY --from=collector /llama.cpp/bin /app
+COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
+
+RUN pip install --no-cache-dir --break-system-packages \
+        -r /app/gguf-py/requirements.txt
+
+ENTRYPOINT [ "/app/tools.sh" ]
+
+
+### CLI Only
+FROM base AS light
+
+WORKDIR /llama.cpp/bin
+
+# Copy llama.cpp binaries and libraries
+COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
+
+ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
+
+
+### Server
+FROM base AS server
+
+ENV LLAMA_ARG_HOST=0.0.0.0
+
+WORKDIR /llama.cpp/bin
+
+# Copy llama.cpp binaries and libraries
+COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
+
+EXPOSE 8080
+
+ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]
@@ -0,0 +1,52 @@
+name: CI (AMD)
+
+on:
+  workflow_dispatch: # allows manual triggering
+  push:
+    branches:
+      - master
+    paths: [
+      '.github/workflows/build-amd.yml',
+      '**/CMakeLists.txt',
+      '**/.cmake',
+      '**/*.h',
+      '**/*.hpp',
+      '**/*.c',
+      '**/*.cpp',
+      '**/*.cu',
+      '**/*.cuh',
+      '**/*.comp'
+    ]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  ggml-ci-x64-amd-vulkan:
+    runs-on: [self-hosted, Linux, X64, AMD]
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v4
+
+      - name: Test
+        id: ggml-ci
+        run: |
+          vulkaninfo --summary
+          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+
+  ggml-ci-x64-amd-rocm:
+    runs-on: [self-hosted, Linux, X64, AMD]
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v4
+
+      - name: Test
+        id: ggml-ci
+        run: |
+          amd-smi static
+          GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
@@ -80,16 +80,7 @@ jobs:
               --add-data './json_to_gbnf.py:.' \
               --add-data './LICENSE.md:.'  \
               --add-data './MIT_LICENSE_GGML_SDCPP_LLAMACPP_ONLY.md:.' \
-              --add-data './klite.embd:.' \
-              --add-data './kcpp_docs.embd:.' \
-              --add-data './kcpp_sdui.embd:.' \
-              --add-data './taesd.embd:.' \
-              --add-data './taesd_xl.embd:.' \
-              --add-data './taesd_f.embd:.' \
-              --add-data './taesd_3.embd:.' \
-              --add-data './kokoro_ipa.embd:.' \
-              --add-data './rwkv_vocab.embd:.' \
-              --add-data './rwkv_world_vocab.embd:.' \
+              --add-data './embd_res:./embd_res' \
               --version-file './version.txt' \
               --clean --console koboldcpp.py -n 'koboldcpp-linux-arm64'
             "
 
@@ -36,7 +36,7 @@ jobs:
           make LLAMA_METAL=1 LLAMA_PORTABLE=1
           chmod +x './create_ver_file.sh'
           . create_ver_file.sh
-          pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './ggml-metal-merged.metal:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './json_to_gbnf.py:.' --add-data './LICENSE.md:.' --add-data './MIT_LICENSE_GGML_SDCPP_LLAMACPP_ONLY.md:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './kokoro_ipa.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-mac-arm64"
+          pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './ggml-metal-merged.metal:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './json_to_gbnf.py:.' --add-data './LICENSE.md:.' --add-data './MIT_LICENSE_GGML_SDCPP_LLAMACPP_ONLY.md:.' --add-data './embd_res:./embd_res' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-mac-arm64"
 
       - name: Test
         id: test
 
@@ -130,7 +130,10 @@ if (LLAMA_CUBLAS)
         # 75 == int8 tensor cores
         # 80 == Ampere, asynchronous data loading, faster tensor core instructions
         message("CUDA Toolkit Version: ${CUDAToolkit_VERSION}")
-        if(CUDAToolkit_VERSION VERSION_GREATER 12)
+        if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 13)
+                add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
+                set(CMAKE_CUDA_ARCHITECTURES "75-virtual;80-virtual;86-virtual") # lowest CUDA 13 standard
+        elseif(CUDAToolkit_VERSION VERSION_GREATER 12)
                 add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
                 set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
         else()
 
@@ -231,6 +231,12 @@ NVCCFLAGS += -Wno-deprecated-gpu-targets \
              -gencode arch=compute_75,code=compute_75 \
              -gencode arch=compute_80,code=compute_80
 
+else ifdef LLAMA_ARCHES_CU13
+NVCCFLAGS += -Wno-deprecated-gpu-targets \
+             -gencode arch=compute_75,code=compute_75 \
+             -gencode arch=compute_80,code=compute_80 \
+             -gencode arch=compute_86,code=compute_86
+
 else
 NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all
 endif
@@ -465,8 +471,13 @@ endif
 
 ifdef NO_VULKAN_EXTENSIONS
 VKGEN_NOEXT_ADD = -DNO_VULKAN_EXTENSIONS
+VKGEN_SUFFIX = -noext
+else
+VKGEN_SUFFIX =
 endif
 VKGEN_NOEXT_FORCE = -DNO_VULKAN_EXTENSIONS
+VKGEN_HPP = ggml/src/ggml-vulkan-shaders$(VKGEN_SUFFIX).hpp
+VKGEN_CPP = ggml/src/ggml-vulkan-shaders$(VKGEN_SUFFIX).cpp
 
 #
 # Print build information
@@ -679,9 +690,9 @@ ggml_v3-opencl.o: otherarch/ggml_v3-opencl.cpp otherarch/ggml_v3-opencl.h
 	$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
 
 #vulkan
-ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h ggml/src/ggml-vulkan-shaders.cpp
+ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(VKGEN_CPP)
 	$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_ADD) $(VULKAN_FLAGS) -c $< -o $@
-ggml-vulkan-shaders.o: ggml/src/ggml-vulkan-shaders.cpp ggml/include/ggml-vulkan.h
+ggml-vulkan-shaders.o: $(VKGEN_CPP) ggml/include/ggml-vulkan.h
 	$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_ADD) $(VULKAN_FLAGS) -c $< -o $@
 ggml-vulkan-noext.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h ggml/src/ggml-vulkan-shaders-noext.cpp
 	$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_FORCE) $(VULKAN_FLAGS) -c $< -o $@
@@ -701,11 +712,11 @@ expose.o: expose.cpp expose.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
 # sd.cpp objects
-sdcpp_default.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
+sdcpp_default.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c
 	$(CXX) $(CXXFLAGS) -c $< -o $@
-sdcpp_cublas.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
+sdcpp_cublas.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c
 	$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
-sdcpp_vulkan.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
+sdcpp_vulkan.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c
 	$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
 
 
@@ -748,8 +759,8 @@ clean:
 # useful tools
 main: tools/main/main.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
-sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -DKCPP_BAKE_SD_VOCAB -o $@ $(LDFLAGS)
 whispermain: otherarch/whispercpp/main.cpp otherarch/whispercpp/whisper.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 ttsmain: tools/tts/tts.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
@@ -779,7 +790,7 @@ vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
 	$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_ADD) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 ifeq ($(OS),Windows_NT)
 	@echo 'Now rebuilding vulkan shaders for Windows...'
-	$(shell) vulkan-shaders-gen --glslc glslc --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp ggml/src/ggml-vulkan-shaders.hpp --target-cpp ggml/src/ggml-vulkan-shaders.cpp --output-dir vulkan-spv-tmp
+	$(shell) vulkan-shaders-gen --glslc glslc --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp $(VKGEN_HPP) --target-cpp $(VKGEN_CPP) --output-dir vulkan-spv-tmp
 	@echo 'Vulkan Shaders Rebuilt for Windows...'
 else
 	@echo 'Now rebuilding vulkan shaders for Linux...'
@@ -813,7 +824,7 @@ else
 		echo "Error: No usable glslc found. Vulkan shaders cannot be compiled!"; \
 	else \
 		echo "Using GLSLC: $$GLSLC_BIN"; \
-		./vulkan-shaders-gen --glslc "$$GLSLC_BIN" --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp ggml/src/ggml-vulkan-shaders.hpp --target-cpp ggml/src/ggml-vulkan-shaders.cpp --output-dir vulkan-spv-tmp; \
+		./vulkan-shaders-gen --glslc "$$GLSLC_BIN" --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp $(VKGEN_HPP) --target-cpp $(VKGEN_CPP) --output-dir vulkan-spv-tmp; \
 	fi
 	@echo 'Vulkan Shaders Rebuilt for Linux...'
 endif