Skip to content

Commit b68eda1

Browse files
authored
Merge pull request #45 from l3utterfly/merge
merge from upstream
2 parents 5d9a182 + bce287c commit b68eda1

File tree

273 files changed

+52546
-21454
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

273 files changed

+52546
-21454
lines changed

.devops/llama-cli-cann.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
22

3-
FROM cosdt/cann:$ASCEND_VERSION AS build
3+
FROM ascendai/cann:$ASCEND_VERSION AS build
44

55
WORKDIR /app
66

@@ -26,7 +26,7 @@ RUN echo "Building with static libs" && \
2626
cmake --build build --config Release --target llama-cli
2727

2828
# TODO: use image with NNRT
29-
FROM cosdt/cann:$ASCEND_VERSION AS runtime
29+
FROM ascendai/cann:$ASCEND_VERSION AS runtime
3030
COPY --from=build /app/build/bin/llama-cli /llama-cli
3131

3232
ENV LC_ALL=C.utf8

.devops/llama-cli-cuda.Dockerfile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2323
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2424
fi && \
2525
cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26-
cmake --build build --config Release --target llama-cli -j$(nproc)
26+
cmake --build build --config Release --target llama-cli -j$(nproc) && \
27+
mkdir -p /app/lib && \
28+
find build -name "*.so" -exec cp {} /app/lib \;
2729

2830
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2931

3032
RUN apt-get update && \
3133
apt-get install -y libgomp1
3234

33-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34-
COPY --from=build /app/build/src/libllama.so /libllama.so
35-
COPY --from=build /app/build/bin/llama-cli /llama-cli
35+
COPY --from=build /app/lib/ /
36+
COPY --from=build /app/build/bin/llama-cli /
3637

3738
ENTRYPOINT [ "/llama-cli" ]

.devops/llama-cli-intel.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
1+
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
22

33
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
44

.devops/llama-cli-musa.Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@ WORKDIR /app
1616
COPY . .
1717

1818
RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
19-
cmake --build build --config Release --target llama-cli -j$(nproc)
19+
cmake --build build --config Release --target llama-cli -j$(nproc) && \
20+
mkdir -p /app/lib && \
21+
find build -name "*.so" -exec cp {} /app/lib \;
2022

2123
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
2224

2325
RUN apt-get update && \
2426
apt-get install -y libgomp1
2527

26-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
27-
COPY --from=build /app/build/src/libllama.so /libllama.so
28+
COPY --from=build /app/lib/ /
2829
COPY --from=build /app/build/bin/llama-cli /llama-cli
2930

3031
ENTRYPOINT [ "/llama-cli" ]

.devops/llama-server-cuda.Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2323
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2424
fi && \
2525
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26-
cmake --build build --config Release --target llama-server -j$(nproc)
26+
cmake --build build --config Release --target llama-server -j$(nproc) && \
27+
mkdir -p /app/lib && \
28+
find build -name "*.so" -exec cp {} /app/lib \;
2729

2830
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2931

3032
RUN apt-get update && \
3133
apt-get install -y libcurl4-openssl-dev libgomp1 curl
3234

33-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34-
COPY --from=build /app/build/src/libllama.so /libllama.so
35+
COPY --from=build /app/lib/ /
3536
COPY --from=build /app/build/bin/llama-server /llama-server
3637

3738
# Must be set to 0.0.0.0 so it can listen to requests from host machine

.devops/llama-server-intel.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
1+
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
22

33
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
44

.devops/llama-server-musa.Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@ WORKDIR /app
1616
COPY . .
1717

1818
RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
19-
cmake --build build --config Release --target llama-server -j$(nproc)
19+
cmake --build build --config Release --target llama-server -j$(nproc) && \
20+
mkdir -p /app/lib && \
21+
find build -name "*.so" -exec cp {} /app/lib \;
2022

2123
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
2224

2325
RUN apt-get update && \
2426
apt-get install -y libcurl4-openssl-dev libgomp1 curl
2527

26-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
27-
COPY --from=build /app/build/src/libllama.so /libllama.so
28+
COPY --from=build /app/lib/ /
2829
COPY --from=build /app/build/bin/llama-server /llama-server
2930

3031
# Must be set to 0.0.0.0 so it can listen to requests from host machine

.devops/nix/package.nix

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ effectiveStdenv.mkDerivation (finalAttrs: {
126126
};
127127

128128
postPatch = ''
129-
substituteInPlace ./ggml/src/ggml-metal.m \
129+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
130130
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
131-
substituteInPlace ./ggml/src/ggml-metal.m \
131+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
132132
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
133133
'';
134134

@@ -173,7 +173,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
173173
(cmakeBool "GGML_NATIVE" false)
174174
(cmakeBool "GGML_BLAS" useBlas)
175175
(cmakeBool "GGML_CUDA" useCuda)
176-
(cmakeBool "GGML_HIPBLAS" useRocm)
176+
(cmakeBool "GGML_HIP" useRocm)
177177
(cmakeBool "GGML_METAL" useMetalKit)
178178
(cmakeBool "GGML_VULKAN" useVulkan)
179179
(cmakeBool "GGML_STATIC" enableStatic)

.editorconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ insert_final_newline = unset
2424
[examples/server/public/*]
2525
indent_size = 2
2626

27+
[examples/server/public/deps_*]
28+
trim_trailing_whitespace = unset
29+
indent_style = unset
30+
indent_size = unset
31+
32+
[examples/server/deps_*]
33+
trim_trailing_whitespace = unset
34+
indent_style = unset
35+
indent_size = unset
36+
2737
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
2838
indent_style = tab
2939

.github/workflows/build.yml

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,13 @@ jobs:
5555
sysctl -a
5656
mkdir build
5757
cd build
58-
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF ..
58+
cmake .. \
59+
-DLLAMA_FATAL_WARNINGS=ON \
60+
-DLLAMA_CURL=ON \
61+
-DGGML_METAL_USE_BF16=ON \
62+
-DGGML_METAL_EMBED_LIBRARY=ON \
63+
-DGGML_RPC=ON \
64+
-DBUILD_SHARED_LIBS=OFF
5965
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
6066
6167
- name: Test
@@ -113,7 +119,12 @@ jobs:
113119
sysctl -a
114120
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
115121
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
116-
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
122+
cmake -B build \
123+
-DLLAMA_FATAL_WARNINGS=ON \
124+
-DLLAMA_CURL=ON \
125+
-DGGML_METAL=OFF \
126+
-DGGML_RPC=ON \
127+
-DBUILD_SHARED_LIBS=OFF
117128
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
118129
119130
- name: Test
@@ -394,15 +405,36 @@ jobs:
394405
- name: Build with native CMake HIP support
395406
id: cmake_build
396407
run: |
397-
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIPBLAS=ON
408+
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
398409
cmake --build build --config Release -j $(nproc)
399410
400411
- name: Build with legacy HIP support
401412
id: cmake_build_legacy_hip
402413
run: |
403-
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON
414+
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
404415
cmake --build build2 --config Release -j $(nproc)
405416
417+
ubuntu-22-cmake-musa:
418+
runs-on: ubuntu-22.04
419+
container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
420+
421+
steps:
422+
- name: Clone
423+
id: checkout
424+
uses: actions/checkout@v4
425+
426+
- name: Dependencies
427+
id: depends
428+
run: |
429+
apt-get update
430+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
431+
432+
- name: Build with native CMake MUSA support
433+
id: cmake_build
434+
run: |
435+
cmake -B build -S . -DGGML_MUSA=ON
436+
cmake --build build --config Release -j $(nproc)
437+
406438
ubuntu-22-cmake-sycl:
407439
runs-on: ubuntu-22.04
408440

@@ -569,6 +601,7 @@ jobs:
569601
mkdir build
570602
cd build
571603
cmake -G Xcode .. \
604+
-DGGML_METAL_USE_BF16=ON \
572605
-DGGML_METAL_EMBED_LIBRARY=ON \
573606
-DLLAMA_BUILD_EXAMPLES=OFF \
574607
-DLLAMA_BUILD_TESTS=OFF \
@@ -599,6 +632,7 @@ jobs:
599632
mkdir build
600633
cd build
601634
cmake -G Xcode .. \
635+
-DGGML_METAL_USE_BF16=ON \
602636
-DGGML_METAL_EMBED_LIBRARY=ON \
603637
-DLLAMA_BUILD_EXAMPLES=OFF \
604638
-DLLAMA_BUILD_TESTS=OFF \
@@ -734,7 +768,7 @@ jobs:
734768
id: clone_kompute
735769
if: ${{ matrix.build == 'kompute-x64' }}
736770
run: |
737-
git submodule update --init ggml/src/kompute
771+
git submodule update --init ggml/src/ggml-kompute/kompute
738772
739773
- name: Download OpenBLAS
740774
id: get_openblas
@@ -917,7 +951,7 @@ jobs:
917951
shell: bash
918952

919953
env:
920-
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595_offline.exe
954+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
921955
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel
922956
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
923957
steps:
@@ -1001,7 +1035,7 @@ jobs:
10011035
run: |
10021036
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
10031037
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1004-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
1038+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
10051039
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
10061040
10071041
windows-latest-cmake-hip-release:
@@ -1037,7 +1071,7 @@ jobs:
10371071
run: |
10381072
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
10391073
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1040-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1074+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
10411075
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
10421076
md "build\bin\rocblas\library\"
10431077
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"

0 commit comments

Comments
 (0)