Skip to content

Commit 1971f38

Browse files
authored
Merge branch 'LostRuins:concedo_experimental' into concedo_experimental
2 parents 4aa4201 + 7447a36 commit 1971f38

File tree

373 files changed

+1263225
-12910
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

373 files changed

+1263225
-12910
lines changed

.devops/s390x.Dockerfile

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
ARG GCC_VERSION=15.2.0
2+
ARG UBUNTU_VERSION=24.04
3+
4+
### Build Llama.cpp stage
5+
FROM gcc:${GCC_VERSION} AS build
6+
7+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
8+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
9+
apt update -y && \
10+
apt upgrade -y && \
11+
apt install -y --no-install-recommends \
12+
git cmake ccache ninja-build \
13+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
14+
libopenblas-dev libcurl4-openssl-dev && \
15+
rm -rf /var/lib/apt/lists/*
16+
17+
WORKDIR /app
18+
COPY . .
19+
20+
RUN --mount=type=cache,target=/root/.ccache \
21+
--mount=type=cache,target=/app/build \
22+
cmake -S . -B build -G Ninja \
23+
-DCMAKE_BUILD_TYPE=Release \
24+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
25+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
26+
-DLLAMA_BUILD_TESTS=OFF \
27+
-DGGML_BACKEND_DL=OFF \
28+
-DGGML_NATIVE=OFF \
29+
-DGGML_BLAS=ON \
30+
-DGGML_BLAS_VENDOR=OpenBLAS && \
31+
cmake --build build --config Release -j $(nproc) && \
32+
cmake --install build --prefix /opt/llama.cpp
33+
34+
COPY *.py /opt/llama.cpp/bin
35+
COPY .devops/tools.sh /opt/llama.cpp/bin
36+
37+
COPY gguf-py /opt/llama.cpp/gguf-py
38+
COPY requirements.txt /opt/llama.cpp/gguf-py
39+
COPY requirements /opt/llama.cpp/gguf-py/requirements
40+
41+
42+
### Collect all llama.cpp binaries, libraries and distro libraries
43+
FROM scratch AS collector
44+
45+
# Copy llama.cpp binaries and libraries
46+
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
47+
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
48+
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
49+
50+
51+
### Base image
52+
FROM ubuntu:${UBUNTU_VERSION} AS base
53+
54+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
55+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
56+
apt update -y && \
57+
apt install -y --no-install-recommends \
58+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
59+
# See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
60+
curl libgomp1 libopenblas-dev && \
61+
apt autoremove -y && \
62+
apt clean -y && \
63+
rm -rf /tmp/* /var/tmp/* && \
64+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
65+
find /var/cache -type f -delete
66+
67+
# Copy llama.cpp libraries
68+
COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
69+
70+
71+
### Full
72+
FROM base AS full
73+
74+
ENV PATH="/root/.cargo/bin:${PATH}"
75+
WORKDIR /app
76+
77+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
78+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
79+
apt update -y && \
80+
apt install -y \
81+
git cmake libjpeg-dev \
82+
python3 python3-pip python3-dev && \
83+
apt autoremove -y && \
84+
apt clean -y && \
85+
rm -rf /tmp/* /var/tmp/* && \
86+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
87+
find /var/cache -type f -delete
88+
89+
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
90+
91+
COPY --from=collector /llama.cpp/bin /app
92+
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
93+
94+
RUN pip install --no-cache-dir --break-system-packages \
95+
-r /app/gguf-py/requirements.txt
96+
97+
ENTRYPOINT [ "/app/tools.sh" ]
98+
99+
100+
### CLI Only
101+
FROM base AS light
102+
103+
WORKDIR /llama.cpp/bin
104+
105+
# Copy llama.cpp binaries and libraries
106+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
107+
108+
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
109+
110+
111+
### Server
112+
FROM base AS server
113+
114+
ENV LLAMA_ARG_HOST=0.0.0.0
115+
116+
WORKDIR /llama.cpp/bin
117+
118+
# Copy llama.cpp binaries and libraries
119+
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
120+
121+
EXPOSE 8080
122+
123+
ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]

.github/workflows/build-amd.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: CI (AMD)
2+
3+
on:
4+
workflow_dispatch: # allows manual triggering
5+
push:
6+
branches:
7+
- master
8+
paths: [
9+
'.github/workflows/build-amd.yml',
10+
'**/CMakeLists.txt',
11+
'**/.cmake',
12+
'**/*.h',
13+
'**/*.hpp',
14+
'**/*.c',
15+
'**/*.cpp',
16+
'**/*.cu',
17+
'**/*.cuh',
18+
'**/*.comp'
19+
]
20+
21+
concurrency:
22+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
23+
cancel-in-progress: true
24+
25+
jobs:
26+
ggml-ci-x64-amd-vulkan:
27+
runs-on: [self-hosted, Linux, X64, AMD]
28+
29+
steps:
30+
- name: Clone
31+
id: checkout
32+
uses: actions/checkout@v4
33+
34+
- name: Test
35+
id: ggml-ci
36+
run: |
37+
vulkaninfo --summary
38+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
39+
40+
ggml-ci-x64-amd-rocm:
41+
runs-on: [self-hosted, Linux, X64, AMD]
42+
43+
steps:
44+
- name: Clone
45+
id: checkout
46+
uses: actions/checkout@v4
47+
48+
- name: Test
49+
id: ggml-ci
50+
run: |
51+
amd-smi static
52+
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

.github/workflows/kcpp-build-release-arm64.yaml

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,16 +80,7 @@ jobs:
8080
--add-data './json_to_gbnf.py:.' \
8181
--add-data './LICENSE.md:.' \
8282
--add-data './MIT_LICENSE_GGML_SDCPP_LLAMACPP_ONLY.md:.' \
83-
--add-data './klite.embd:.' \
84-
--add-data './kcpp_docs.embd:.' \
85-
--add-data './kcpp_sdui.embd:.' \
86-
--add-data './taesd.embd:.' \
87-
--add-data './taesd_xl.embd:.' \
88-
--add-data './taesd_f.embd:.' \
89-
--add-data './taesd_3.embd:.' \
90-
--add-data './kokoro_ipa.embd:.' \
91-
--add-data './rwkv_vocab.embd:.' \
92-
--add-data './rwkv_world_vocab.embd:.' \
83+
--add-data './embd_res:./embd_res' \
9384
--version-file './version.txt' \
9485
--clean --console koboldcpp.py -n 'koboldcpp-linux-arm64'
9586
"

.github/workflows/kcpp-build-release-macos.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
make LLAMA_METAL=1 LLAMA_PORTABLE=1
3737
chmod +x './create_ver_file.sh'
3838
. create_ver_file.sh
39-
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './ggml-metal-merged.metal:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './json_to_gbnf.py:.' --add-data './LICENSE.md:.' --add-data './MIT_LICENSE_GGML_SDCPP_LLAMACPP_ONLY.md:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './kokoro_ipa.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-mac-arm64"
39+
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './ggml-metal-merged.metal:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './json_to_gbnf.py:.' --add-data './LICENSE.md:.' --add-data './MIT_LICENSE_GGML_SDCPP_LLAMACPP_ONLY.md:.' --add-data './embd_res:./embd_res' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-mac-arm64"
4040
4141
- name: Test
4242
id: test

.windsurf/rules/css-architecture.md

Lines changed: 0 additions & 7 deletions
This file was deleted.

.windsurf/rules/sveltekit-architecture.md

Lines changed: 0 additions & 48 deletions
This file was deleted.

.windsurf/rules/tests.md

Lines changed: 0 additions & 9 deletions
This file was deleted.

.windsurf/rules/typescript-architecture.md

Lines changed: 0 additions & 7 deletions
This file was deleted.

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,10 @@ if (LLAMA_CUBLAS)
130130
# 75 == int8 tensor cores
131131
# 80 == Ampere, asynchronous data loading, faster tensor core instructions
132132
message("CUDA Toolkit Version: ${CUDAToolkit_VERSION}")
133-
if(CUDAToolkit_VERSION VERSION_GREATER 12)
133+
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 13)
134+
add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
135+
set(CMAKE_CUDA_ARCHITECTURES "75-virtual;80-virtual;86-virtual") # lowest CUDA 13 standard
136+
elseif(CUDAToolkit_VERSION VERSION_GREATER 12)
134137
add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
135138
set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
136139
else()

Makefile

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,12 @@ NVCCFLAGS += -Wno-deprecated-gpu-targets \
231231
-gencode arch=compute_75,code=compute_75 \
232232
-gencode arch=compute_80,code=compute_80
233233

234+
else ifdef LLAMA_ARCHES_CU13
235+
NVCCFLAGS += -Wno-deprecated-gpu-targets \
236+
-gencode arch=compute_75,code=compute_75 \
237+
-gencode arch=compute_80,code=compute_80 \
238+
-gencode arch=compute_86,code=compute_86
239+
234240
else
235241
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all
236242
endif
@@ -465,8 +471,13 @@ endif
465471

466472
ifdef NO_VULKAN_EXTENSIONS
467473
VKGEN_NOEXT_ADD = -DNO_VULKAN_EXTENSIONS
474+
VKGEN_SUFFIX = -noext
475+
else
476+
VKGEN_SUFFIX =
468477
endif
469478
VKGEN_NOEXT_FORCE = -DNO_VULKAN_EXTENSIONS
479+
VKGEN_HPP = ggml/src/ggml-vulkan-shaders$(VKGEN_SUFFIX).hpp
480+
VKGEN_CPP = ggml/src/ggml-vulkan-shaders$(VKGEN_SUFFIX).cpp
470481

471482
#
472483
# Print build information
@@ -679,9 +690,9 @@ ggml_v3-opencl.o: otherarch/ggml_v3-opencl.cpp otherarch/ggml_v3-opencl.h
679690
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
680691

681692
#vulkan
682-
ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h ggml/src/ggml-vulkan-shaders.cpp
693+
ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(VKGEN_CPP)
683694
$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_ADD) $(VULKAN_FLAGS) -c $< -o $@
684-
ggml-vulkan-shaders.o: ggml/src/ggml-vulkan-shaders.cpp ggml/include/ggml-vulkan.h
695+
ggml-vulkan-shaders.o: $(VKGEN_CPP) ggml/include/ggml-vulkan.h
685696
$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_ADD) $(VULKAN_FLAGS) -c $< -o $@
686697
ggml-vulkan-noext.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h ggml/src/ggml-vulkan-shaders-noext.cpp
687698
$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_FORCE) $(VULKAN_FLAGS) -c $< -o $@
@@ -701,11 +712,11 @@ expose.o: expose.cpp expose.h
701712
$(CXX) $(CXXFLAGS) -c $< -o $@
702713

703714
# sd.cpp objects
704-
sdcpp_default.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
715+
sdcpp_default.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c
705716
$(CXX) $(CXXFLAGS) -c $< -o $@
706-
sdcpp_cublas.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
717+
sdcpp_cublas.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c
707718
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
708-
sdcpp_vulkan.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
719+
sdcpp_vulkan.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c
709720
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
710721

711722

@@ -748,8 +759,8 @@ clean:
748759
# useful tools
749760
main: tools/main/main.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
750761
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
751-
sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
752-
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
762+
sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
763+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -DKCPP_BAKE_SD_VOCAB -o $@ $(LDFLAGS)
753764
whispermain: otherarch/whispercpp/main.cpp otherarch/whispercpp/whisper.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
754765
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
755766
ttsmain: tools/tts/tts.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
@@ -779,7 +790,7 @@ vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
779790
$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_ADD) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
780791
ifeq ($(OS),Windows_NT)
781792
@echo 'Now rebuilding vulkan shaders for Windows...'
782-
$(shell) vulkan-shaders-gen --glslc glslc --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp ggml/src/ggml-vulkan-shaders.hpp --target-cpp ggml/src/ggml-vulkan-shaders.cpp --output-dir vulkan-spv-tmp
793+
$(shell) vulkan-shaders-gen --glslc glslc --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp $(VKGEN_HPP) --target-cpp $(VKGEN_CPP) --output-dir vulkan-spv-tmp
783794
@echo 'Vulkan Shaders Rebuilt for Windows...'
784795
else
785796
@echo 'Now rebuilding vulkan shaders for Linux...'
@@ -813,7 +824,7 @@ else
813824
echo "Error: No usable glslc found. Vulkan shaders cannot be compiled!"; \
814825
else \
815826
echo "Using GLSLC: $$GLSLC_BIN"; \
816-
./vulkan-shaders-gen --glslc "$$GLSLC_BIN" --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp ggml/src/ggml-vulkan-shaders.hpp --target-cpp ggml/src/ggml-vulkan-shaders.cpp --output-dir vulkan-spv-tmp; \
827+
./vulkan-shaders-gen --glslc "$$GLSLC_BIN" --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp $(VKGEN_HPP) --target-cpp $(VKGEN_CPP) --output-dir vulkan-spv-tmp; \
817828
fi
818829
@echo 'Vulkan Shaders Rebuilt for Linux...'
819830
endif

0 commit comments

Comments
 (0)