Skip to content

Commit 1392cd5

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 58dbb06 + 264f1b5 commit 1392cd5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+1320
-2088
lines changed

.clang-tidy

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Checks: >
1717
clang-analyzer-*,
1818
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
1919
performance-*,
20+
-performance-enum-size,
2021
portability-*,
2122
-portability-simd-intrinsics,
2223
misc-*,

.devops/s390x.Dockerfile

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
ARG GCC_VERSION=15.2.0
2+
ARG UBUNTU_VERSION=24.04
3+
4+
### Build Llama.cpp stage
5+
FROM --platform=linux/s390x gcc:${GCC_VERSION} AS build
6+
7+
RUN --mount=type=cache,target=/var/cache/apt \
8+
--mount=type=cache,target=/var/lib/apt/lists \
9+
apt update -y && \
10+
apt upgrade -y && \
11+
apt install -y --no-install-recommends \
12+
git cmake ccache ninja-build \
13+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
14+
libopenblas-dev libcurl4-openssl-dev && \
15+
rm -rf /var/lib/apt/lists/*
16+
17+
WORKDIR /app
18+
COPY . .
19+
20+
RUN --mount=type=cache,target=/root/.ccache \
21+
--mount=type=cache,target=/app/build \
22+
cmake -S . -B build -G Ninja \
23+
-DCMAKE_BUILD_TYPE=Release \
24+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
25+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
26+
-DLLAMA_BUILD_TESTS=OFF \
27+
-DGGML_BACKEND_DL=OFF \
28+
-DGGML_NATIVE=OFF \
29+
-DGGML_BLAS=ON \
30+
-DGGML_BLAS_VENDOR=OpenBLAS && \
31+
cmake --build build --config Release -j $(nproc) && \
32+
cmake --install build --prefix /opt/llama.cpp
33+
34+
COPY *.py /opt/llama.cpp/bin
35+
COPY .devops/tools.sh /opt/llama.cpp/bin
36+
37+
COPY gguf-py /opt/llama.cpp/gguf-py
38+
COPY requirements.txt /opt/llama.cpp/gguf-py
39+
COPY requirements /opt/llama.cpp/gguf-py/requirements
40+
41+
42+
### Collect all llama.cpp binaries, libraries and distro libraries
43+
FROM --platform=linux/s390x scratch AS collector
44+
45+
# Copy llama.cpp binaries and libraries
46+
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
47+
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
48+
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
49+
50+
51+
### Base image
52+
FROM --platform=linux/s390x ubuntu:${UBUNTU_VERSION} AS base
53+
54+
RUN --mount=type=cache,target=/var/cache/apt \
55+
--mount=type=cache,target=/var/lib/apt/lists \
56+
apt update -y && \
57+
apt install -y --no-install-recommends \
58+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
59+
curl libgomp1 libopenblas-dev && \
60+
apt autoremove -y && \
61+
apt clean -y && \
62+
rm -rf /tmp/* /var/tmp/* && \
63+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
64+
find /var/cache -type f -delete
65+
66+
# Copy llama.cpp libraries
67+
COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
68+
69+
70+
### Full
71+
FROM --platform=linux/s390x base AS full
72+
73+
ENV PATH="/root/.cargo/bin:${PATH}"
74+
WORKDIR /app
75+
76+
RUN --mount=type=cache,target=/var/cache/apt \
77+
--mount=type=cache,target=/var/lib/apt/lists \
78+
apt update -y && \
79+
apt install -y \
80+
git cmake libjpeg-dev \
81+
python3 python3-pip python3-dev && \
82+
apt autoremove -y && \
83+
apt clean -y && \
84+
rm -rf /tmp/* /var/tmp/* && \
85+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
86+
find /var/cache -type f -delete
87+
88+
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
89+
90+
COPY --from=collector /llama.cpp/bin /app
91+
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
92+
93+
RUN pip install --no-cache-dir --break-system-packages \
94+
-r /app/gguf-py/requirements.txt
95+
96+
ENTRYPOINT [ "/app/tools.sh" ]
97+
98+
99+
### CLI Only
100+
FROM --platform=linux/s390x base AS light
101+
102+
WORKDIR /llama.cpp/bin
103+
104+
# Copy llama.cpp binaries and libraries
105+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
106+
107+
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
108+
109+
110+
### Server
111+
FROM --platform=linux/s390x base AS server
112+
113+
ENV LLAMA_ARG_HOST=0.0.0.0
114+
115+
WORKDIR /llama.cpp/bin
116+
117+
# Copy llama.cpp binaries and libraries
118+
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
119+
120+
EXPOSE 8080
121+
122+
ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]

.github/workflows/build.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,7 +1396,6 @@ jobs:
13961396
- name: Test
13971397
id: ggml-ci
13981398
run: |
1399-
vulkaninfo
14001399
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
14011400
14021401
ggml-ci-x64-amd-v710-rocm:
@@ -1410,7 +1409,6 @@ jobs:
14101409
- name: Test
14111410
id: ggml-ci
14121411
run: |
1413-
vulkaninfo
14141412
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
14151413
14161414
ggml-ci-mac-metal:

.github/workflows/docker.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ jobs:
4444
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
4545
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
4646
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
47+
- { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false }
4748
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
4849
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
4950
steps:

CODEOWNERS

Lines changed: 104 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,106 @@
11
# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs
2+
# multiplie collaborators per item can be specified
23

3-
/ci/ @ggerganov
4-
/.devops/*.Dockerfile @ngxson
5-
/tools/server/ @ngxson
6-
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
7-
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
8-
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
9-
/ggml/src/ggml-opt.cpp @JohannesGaessler
10-
/ggml/src/gguf.cpp @JohannesGaessler
11-
/ggml/src/ggml-vulkan/ @0cc4m
12-
/ggml/src/ggml-zdnn/ @taronaeo
4+
/.devops/*.Dockerfile @ngxson
5+
/.github/actions/ @slaren
6+
/.github/workflows/ @CISC
7+
/.github/workflows/release.yml @slaren
8+
/.github/workflows/winget.yml @slaren
9+
/ci/ @ggerganov
10+
/cmake/ @ggerganov
11+
/common/CMakeLists.txt @ggerganov
12+
/common/arg.* @ggerganov @ericcurtin
13+
/common/base64.hpp.* @ggerganov
14+
/common/build-info.* @ggerganov
15+
/common/common.* @ggerganov
16+
/common/console.* @ggerganov
17+
/common/llguidance.* @ggerganov
18+
/common/log.* @ggerganov
19+
/common/sampling.* @ggerganov
20+
/common/speculative.* @ggerganov
21+
/convert_*.py @CISC
22+
/examples/batched.swift/ @ggerganov
23+
/examples/batched/ @ggerganov
24+
/examples/convert-llama2c-to-ggml/ @ggerganov
25+
/examples/deprecation-warning/ @ggerganov
26+
/examples/diffusion/ @am17an
27+
/examples/embedding/ @ggerganov
28+
/examples/eval-callback/ @ggerganov
29+
/examples/export-docs/ @ggerganov
30+
/examples/gen-docs/ @ggerganov
31+
/examples/gguf/ @ggerganov
32+
/examples/llama.android/ @ggerganov
33+
/examples/llama.swiftui/ @ggerganov
34+
/examples/llama.vim @ggerganov
35+
/examples/lookahead/ @ggerganov
36+
/examples/lookup/ @JohannesGaessler
37+
/examples/model-conversion/ @danbev
38+
/examples/parallel/ @ggerganov
39+
/examples/passkey/ @ggerganov
40+
/examples/retrieval/ @ggerganov
41+
/examples/save-load-state/ @ggerganov
42+
/examples/simple-chat/ @slaren
43+
/examples/simple/ @slaren
44+
/examples/speculative-simple/ @ggerganov
45+
/examples/speculative/ @ggerganov
46+
/ggml/cmake/ @ggerganov
47+
/ggml/include/ @ggerganov @slaren
48+
/ggml/src/ggml-alloc.c @slaren
49+
/ggml/src/ggml-backend* @slaren
50+
/ggml/src/ggml-blas/ @slaren
51+
/ggml/src/ggml-common.h @ggerganov @slaren
52+
/ggml/src/ggml-cpu/ @ggerganov @slaren
53+
/ggml/src/ggml-cuda/common.cuh @slaren
54+
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
55+
/ggml/src/ggml-cuda/ggml-cuda.cu @slaren
56+
/ggml/src/ggml-cuda/mmf.* @JohannesGaessler
57+
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
58+
/ggml/src/ggml-cuda/mmvf.* @JohannesGaessler
59+
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
60+
/ggml/src/ggml-impl.h @ggerganov @slaren
61+
/ggml/src/ggml-metal/ @ggerganov
62+
/ggml/src/ggml-opt.cpp @JohannesGaessler
63+
/ggml/src/ggml-quants.* @ggerganov
64+
/ggml/src/ggml-threading.* @ggerganov @slaren
65+
/ggml/src/ggml-vulkan/ @0cc4m
66+
/ggml/src/ggml-zdnn/ @taronaeo
67+
/ggml/src/ggml.c @ggerganov @slaren
68+
/ggml/src/ggml.cpp @ggerganov @slaren
69+
/ggml/src/gguf.cpp @JohannesGaessler @Green-Sky
70+
/gguf-py/ @CISC
71+
/media/ @ggerganov
72+
/scripts/gen* @ggerganov
73+
/scripts/get* @ggerganov
74+
/scripts/sync* @ggerganov
75+
/src/ @ggerganov
76+
/src/llama-adapter.* @CISC
77+
/src/llama-arch.* @CISC
78+
/src/llama-chat.* @ngxson
79+
/src/llama-graph.* @CISC
80+
/src/llama-model-loader.* @slaren
81+
/src/llama-model.* @CISC
82+
/src/llama-vocab.* @CISC
83+
/tests/ @ggerganov
84+
/tests/test-backend-ops.cpp @slaren
85+
/tests/test-thread-safety.cpp @slaren
86+
/tools/batched-bench/ @ggerganov
87+
/tools/llama-bench/ @slaren
88+
/tools/main/ @ggerganov
89+
/tools/mtmd/ @ngxson
90+
/tools/perplexity/ @ggerganov
91+
/tools/quantize/ @ggerganov
92+
/tools/run/ @ericcurtin
93+
/tools/server/* @ngxson @ggerganov @ericcurtin # no subdir
94+
/tools/server/webui/ @allozaur
95+
/tools/tokenize/ @ggerganov
96+
/tools/tts/ @ggerganov
97+
/vendor/ @ggerganov
98+
.clang-format @slaren
99+
.clang-tidy @slaren
100+
AUTHORS @ggerganov
101+
CMakeLists.txt @ggerganov
102+
CONTRIBUTING.md @ggerganov
103+
LICENSE @ggerganov
104+
README.md @ggerganov
105+
SECURITY.md @ggerganov
106+
requirements*.txt @CISC

CONTRIBUTING.md

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1-
# Pull requests (for contributors)
1+
# Contributors
2+
3+
The project differentiates between 3 levels of contributors:
4+
5+
- Contributors: people who have contributed before (no special privileges)
6+
- Collaborators (Triage): people with significant contributions, who may be responsible for some parts of the code, and are expected to maintain and review contributions for the code they own
7+
- Maintainers: responsible for reviewing and merging PRs, after approval from the code owners
8+
9+
# Pull requests (for contributors & collaborators)
210

311
- llama.cpp uses the ggml tensor library for model evaluation. If you are unfamiliar with ggml, consider taking a look at the [examples in the ggml repository](https://github.com/ggml-org/ggml/tree/master/examples/). [simple](https://github.com/ggml-org/ggml/tree/master/examples/simple) shows the bare minimum for using ggml. [gpt-2](https://github.com/ggml-org/ggml/tree/master/examples/gpt-2) has minimal implementations for language model inference using GPT-2. [mnist](https://github.com/ggml-org/ggml/tree/master/examples/mnist) demonstrates how to train and evaluate a simple image classifier
412
- Test your changes:
@@ -9,15 +17,16 @@
917
- Create separate PRs for each feature or fix. Avoid combining unrelated changes in a single PR
1018
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
1119
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
20+
- Maintainers will rely on your insights and approval when making a final decision to approve and merge a PR
21+
- Consider adding yourself to [CODEOWNERS](CODEOWNERS) to indicate your availability for reviewing related PRs
1222

13-
# Pull requests (for collaborators)
23+
# Pull requests (for maintainers)
1424

1525
- Squash-merge PRs
1626
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
1727
- Optionally pick a `<module>` from here: https://github.com/ggml-org/llama.cpp/wiki/Modules
18-
- Consider adding yourself to [CODEOWNERS](CODEOWNERS)
19-
- Let authors, who are also collaborators, merge their own PRs
20-
- When merging a PR by a contributor, make sure you have a good understanding of the changes
28+
- Let other maintainers, merge their own PRs
29+
- When merging a PR, make sure you have a good understanding of the changes
2130
- Be mindful of maintenance: most of the work going into a feature happens after the PR is merged. If the PR author is not committed to contribute long-term, someone else needs to take responsibility (you)
2231

2332
# Coding guidelines
@@ -117,6 +126,21 @@
117126
#endif // FOO
118127
```
119128
129+
# Code maintenance
130+
131+
- Existing code should have designated collaborators and/or maintainers specified in the [CODEOWNERS](CODEOWNERS) file reponsible for:
132+
- Reviewing and merging related PRs
133+
- Fixing related bugs
134+
- Providing developer guidance/support
135+
136+
- When adding or modifying a large piece of code:
137+
- If you are a collaborator, make sure to add yourself to [CODEOWNERS](CODEOWNERS) to indicate your availability for reviewing related PRs
138+
- If you are a contributor, find an existing collaborator who is willing to review and maintain your code long-term
139+
- Provide the necessary CI workflow (and hardware) to test your changes (see [ci/README.md](https://github.com/ggml-org/llama.cpp/tree/master/ci))
140+
141+
- New code should follow the guidelines (coding, naming, etc.) outlined in this document. Exceptions are allowed in isolated, backend-specific parts of the code that do not interface directly with the `ggml` interfaces.
142+
_(NOTE: for legacy reasons, existing code is not required to follow this guideline)_
143+
120144
# Documentation
121145
122146
- Documentation is a community effort

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
274274
| [Vulkan](docs/build.md#vulkan) | GPU |
275275
| [CANN](docs/build.md#cann) | Ascend NPU |
276276
| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
277+
| [IBM zDNN](docs/backend/zDNN.md) | IBM Z & LinuxONE |
277278
| [WebGPU [In Progress]](docs/build.md#webgpu) | All |
278279
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
279280

@@ -520,8 +521,8 @@ To learn more about model quantization, [read this documentation](tools/quantize
520521
## Contributing
521522
522523
- Contributors can open PRs
523-
- Collaborators can push to branches in the `llama.cpp` repo and merge PRs into the `master` branch
524524
- Collaborators will be invited based on contributions
525+
- Maintainers can push to branches in the `llama.cpp` repo and merge PRs into the `master` branch
525526
- Any help with managing issues, PRs and projects is very appreciated!
526527
- See [good first issues](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions
527528
- Read the [CONTRIBUTING.md](CONTRIBUTING.md) for more information

0 commit comments

Comments
 (0)