Skip to content

Commit 2e74492

Browse files
author
prima
committed
Merge remote-tracking branch 'origin/concedo_experimental' into remoteManagement
2 parents 9ec887d + e4c4040 commit 2e74492

File tree

317 files changed

+789832
-9209
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

317 files changed

+789832
-9209
lines changed

.devops/s390x.Dockerfile

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
ARG GCC_VERSION=15.2.0
2+
ARG UBUNTU_VERSION=24.04
3+
4+
### Build Llama.cpp stage
5+
FROM gcc:${GCC_VERSION} AS build
6+
7+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
8+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
9+
apt update -y && \
10+
apt upgrade -y && \
11+
apt install -y --no-install-recommends \
12+
git cmake ccache ninja-build \
13+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
14+
libopenblas-dev libcurl4-openssl-dev && \
15+
rm -rf /var/lib/apt/lists/*
16+
17+
WORKDIR /app
18+
COPY . .
19+
20+
RUN --mount=type=cache,target=/root/.ccache \
21+
--mount=type=cache,target=/app/build \
22+
cmake -S . -B build -G Ninja \
23+
-DCMAKE_BUILD_TYPE=Release \
24+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
25+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
26+
-DLLAMA_BUILD_TESTS=OFF \
27+
-DGGML_BACKEND_DL=OFF \
28+
-DGGML_NATIVE=OFF \
29+
-DGGML_BLAS=ON \
30+
-DGGML_BLAS_VENDOR=OpenBLAS && \
31+
cmake --build build --config Release -j $(nproc) && \
32+
cmake --install build --prefix /opt/llama.cpp
33+
34+
COPY *.py /opt/llama.cpp/bin
35+
COPY .devops/tools.sh /opt/llama.cpp/bin
36+
37+
COPY gguf-py /opt/llama.cpp/gguf-py
38+
COPY requirements.txt /opt/llama.cpp/gguf-py
39+
COPY requirements /opt/llama.cpp/gguf-py/requirements
40+
41+
42+
### Collect all llama.cpp binaries, libraries and distro libraries
43+
FROM scratch AS collector
44+
45+
# Copy llama.cpp binaries and libraries
46+
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
47+
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
48+
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
49+
50+
51+
### Base image
52+
FROM ubuntu:${UBUNTU_VERSION} AS base
53+
54+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
55+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
56+
apt update -y && \
57+
apt install -y --no-install-recommends \
58+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
59+
# See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
60+
curl libgomp1 libopenblas-dev && \
61+
apt autoremove -y && \
62+
apt clean -y && \
63+
rm -rf /tmp/* /var/tmp/* && \
64+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
65+
find /var/cache -type f -delete
66+
67+
# Copy llama.cpp libraries
68+
COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
69+
70+
71+
### Full
72+
FROM base AS full
73+
74+
ENV PATH="/root/.cargo/bin:${PATH}"
75+
WORKDIR /app
76+
77+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
78+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
79+
apt update -y && \
80+
apt install -y \
81+
git cmake libjpeg-dev \
82+
python3 python3-pip python3-dev && \
83+
apt autoremove -y && \
84+
apt clean -y && \
85+
rm -rf /tmp/* /var/tmp/* && \
86+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
87+
find /var/cache -type f -delete
88+
89+
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
90+
91+
COPY --from=collector /llama.cpp/bin /app
92+
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
93+
94+
RUN pip install --no-cache-dir --break-system-packages \
95+
-r /app/gguf-py/requirements.txt
96+
97+
ENTRYPOINT [ "/app/tools.sh" ]
98+
99+
100+
### CLI Only
101+
FROM base AS light
102+
103+
WORKDIR /llama.cpp/bin
104+
105+
# Copy llama.cpp binaries and libraries
106+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
107+
108+
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
109+
110+
111+
### Server
112+
FROM base AS server
113+
114+
ENV LLAMA_ARG_HOST=0.0.0.0
115+
116+
WORKDIR /llama.cpp/bin
117+
118+
# Copy llama.cpp binaries and libraries
119+
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
120+
121+
EXPOSE 8080
122+
123+
ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]

.github/workflows/build-amd.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: CI (AMD)
2+
3+
on:
4+
workflow_dispatch: # allows manual triggering
5+
push:
6+
branches:
7+
- master
8+
paths: [
9+
'.github/workflows/build-amd.yml',
10+
'**/CMakeLists.txt',
11+
'**/.cmake',
12+
'**/*.h',
13+
'**/*.hpp',
14+
'**/*.c',
15+
'**/*.cpp',
16+
'**/*.cu',
17+
'**/*.cuh',
18+
'**/*.comp'
19+
]
20+
21+
concurrency:
22+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
23+
cancel-in-progress: true
24+
25+
jobs:
26+
ggml-ci-x64-amd-vulkan:
27+
runs-on: [self-hosted, Linux, X64, AMD]
28+
29+
steps:
30+
- name: Clone
31+
id: checkout
32+
uses: actions/checkout@v4
33+
34+
- name: Test
35+
id: ggml-ci
36+
run: |
37+
vulkaninfo --summary
38+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
39+
40+
ggml-ci-x64-amd-rocm:
41+
runs-on: [self-hosted, Linux, X64, AMD]
42+
43+
steps:
44+
- name: Clone
45+
id: checkout
46+
uses: actions/checkout@v4
47+
48+
- name: Test
49+
id: ggml-ci
50+
run: |
51+
amd-smi static
52+
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

.windsurf/rules/css-architecture.md

Lines changed: 0 additions & 7 deletions
This file was deleted.

.windsurf/rules/sveltekit-architecture.md

Lines changed: 0 additions & 48 deletions
This file was deleted.

.windsurf/rules/tests.md

Lines changed: 0 additions & 9 deletions
This file was deleted.

.windsurf/rules/typescript-architecture.md

Lines changed: 0 additions & 7 deletions
This file was deleted.

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,10 @@ if (LLAMA_CUBLAS)
135135
# 75 == int8 tensor cores
136136
# 80 == Ampere, asynchronous data loading, faster tensor core instructions
137137
message("CUDA Toolkit Version: ${CUDAToolkit_VERSION}")
138-
if(CUDAToolkit_VERSION VERSION_GREATER 12)
138+
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 13)
139+
add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
140+
set(CMAKE_CUDA_ARCHITECTURES "75-virtual;80-virtual;86-virtual") # lowest CUDA 13 standard
141+
elseif(CUDAToolkit_VERSION VERSION_GREATER 12)
139142
add_compile_definitions(GGML_CUDA_USE_GRAPHS) #try enable cuda graphs on cu12 build
140143
set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual") # lowest CUDA 12 standard + lowest for integer intrinsics
141144
else()

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,12 @@ NVCCFLAGS += -Wno-deprecated-gpu-targets \
231231
-gencode arch=compute_75,code=compute_75 \
232232
-gencode arch=compute_80,code=compute_80
233233

234+
else ifdef LLAMA_ARCHES_CU13
235+
NVCCFLAGS += -Wno-deprecated-gpu-targets \
236+
-gencode arch=compute_75,code=compute_75 \
237+
-gencode arch=compute_80,code=compute_80 \
238+
-gencode arch=compute_86,code=compute_86
239+
234240
else
235241
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=all
236242
endif
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
set(CMAKE_SYSTEM_NAME Linux)
2+
set(CMAKE_SYSTEM_PROCESSOR riscv64)
3+
set(CMAKE_SYSTEM_VERSION 1)
4+
5+
if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(riscv)")
6+
message(STATUS "HOST SYSTEM ${CMAKE_HOST_SYSTEM_PROCESSOR}")
7+
else()
8+
set(GNU_MACHINE riscv64-unknown-linux-gnu CACHE STRING "GNU compiler triple")
9+
if (DEFINED ENV{RISCV_ROOT_PATH})
10+
file(TO_CMAKE_PATH $ENV{RISCV_ROOT_PATH} RISCV_ROOT_PATH)
11+
else()
12+
message(FATAL_ERROR "RISCV_ROOT_PATH env must be defined")
13+
endif()
14+
15+
set(RISCV_ROOT_PATH ${RISCV_ROOT_PATH} CACHE STRING "root path to riscv toolchain")
16+
set(CMAKE_C_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-gcc)
17+
set(CMAKE_CXX_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-g++)
18+
set(CMAKE_STRIP ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-strip)
19+
set(CMAKE_FIND_ROOT_PATH "${RISCV_ROOT_PATH}/riscv64-unknown-linux-gnu")
20+
set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot")
21+
endif()
22+
23+
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
24+
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
25+
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
26+
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
27+
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CMAKE_C_FLAGS}")
28+
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CXX_FLAGS}")
29+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -latomic")

0 commit comments

Comments
 (0)