Thireus
diff --git a/‎.clang-tidy‎
Lines changed: 1 addition & 0 deletions b/‎.clang-tidy‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.devops/s390x.Dockerfile‎
Lines changed: 122 additions & 0 deletions b/‎.devops/s390x.Dockerfile‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 0 additions & 2 deletions b/‎.github/workflows/build.yml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/docker.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/docker.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎CODEOWNERS‎
Lines changed: 104 additions & 10 deletions b/‎CODEOWNERS‎
Lines changed: 104 additions & 10 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 29 additions & 5 deletions b/‎CONTRIBUTING.md‎
Lines changed: 29 additions & 5 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
@@ -17,6 +17,7 @@ Checks: >
     clang-analyzer-*,
     -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
     performance-*,
+    -performance-enum-size,
     portability-*,
     -portability-simd-intrinsics,
     misc-*,
 
@@ -0,0 +1,122 @@
+ARG GCC_VERSION=15.2.0
+ARG UBUNTU_VERSION=24.04
+
+### Build Llama.cpp stage
+FROM --platform=linux/s390x gcc:${GCC_VERSION} AS build
+
+RUN --mount=type=cache,target=/var/cache/apt \
+    --mount=type=cache,target=/var/lib/apt/lists \
+    apt update -y && \
+    apt upgrade -y && \
+    apt install -y --no-install-recommends \
+        git cmake ccache ninja-build \
+        # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
+        libopenblas-dev libcurl4-openssl-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY . .
+
+RUN --mount=type=cache,target=/root/.ccache \
+    --mount=type=cache,target=/app/build \
+    cmake -S . -B build -G Ninja \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+        -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+        -DLLAMA_BUILD_TESTS=OFF \
+        -DGGML_BACKEND_DL=OFF \
+        -DGGML_NATIVE=OFF \
+        -DGGML_BLAS=ON \
+        -DGGML_BLAS_VENDOR=OpenBLAS && \
+    cmake --build build --config Release -j $(nproc) && \
+    cmake --install build --prefix /opt/llama.cpp
+
+COPY *.py             /opt/llama.cpp/bin
+COPY .devops/tools.sh /opt/llama.cpp/bin
+
+COPY gguf-py          /opt/llama.cpp/gguf-py
+COPY requirements.txt /opt/llama.cpp/gguf-py
+COPY requirements     /opt/llama.cpp/gguf-py/requirements
+
+
+### Collect all llama.cpp binaries, libraries and distro libraries
+FROM --platform=linux/s390x scratch AS collector
+
+# Copy llama.cpp binaries and libraries
+COPY --from=build /opt/llama.cpp/bin     /llama.cpp/bin
+COPY --from=build /opt/llama.cpp/lib     /llama.cpp/lib
+COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
+
+
+### Base image
+FROM --platform=linux/s390x ubuntu:${UBUNTU_VERSION} AS base
+
+RUN --mount=type=cache,target=/var/cache/apt \
+    --mount=type=cache,target=/var/lib/apt/lists \
+    apt update -y && \
+    apt install -y --no-install-recommends \
+        # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
+        curl libgomp1 libopenblas-dev && \
+    apt autoremove -y && \
+    apt clean -y && \
+    rm -rf /tmp/* /var/tmp/* && \
+    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
+    find /var/cache -type f -delete
+
+# Copy llama.cpp libraries
+COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
+
+
+### Full
+FROM --platform=linux/s390x base AS full
+
+ENV PATH="/root/.cargo/bin:${PATH}"
+WORKDIR /app
+
+RUN --mount=type=cache,target=/var/cache/apt \
+    --mount=type=cache,target=/var/lib/apt/lists \
+    apt update -y && \
+    apt install -y \
+        git cmake libjpeg-dev \
+        python3 python3-pip python3-dev && \
+    apt autoremove -y && \
+    apt clean -y && \
+    rm -rf /tmp/* /var/tmp/* && \
+    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
+    find /var/cache -type f -delete
+
+RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
+
+COPY --from=collector /llama.cpp/bin /app
+COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
+
+RUN pip install --no-cache-dir --break-system-packages \
+        -r /app/gguf-py/requirements.txt
+
+ENTRYPOINT [ "/app/tools.sh" ]
+
+
+### CLI Only
+FROM --platform=linux/s390x base AS light
+
+WORKDIR /llama.cpp/bin
+
+# Copy llama.cpp binaries and libraries
+COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
+
+ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
+
+
+### Server
+FROM --platform=linux/s390x base AS server
+
+ENV LLAMA_ARG_HOST=0.0.0.0
+
+WORKDIR /llama.cpp/bin
+
+# Copy llama.cpp binaries and libraries
+COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
+
+EXPOSE 8080
+
+ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]
@@ -1396,7 +1396,6 @@ jobs:
       - name: Test
         id: ggml-ci
         run: |
-          vulkaninfo
           GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
 
   ggml-ci-x64-amd-v710-rocm:
@@ -1410,7 +1409,6 @@ jobs:
       - name: Test
         id: ggml-ci
         run: |
-          vulkaninfo
           GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
 
   ggml-ci-mac-metal:
 
@@ -44,6 +44,7 @@ jobs:
           - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
           - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
           - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
+          - { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false }
           # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
           #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
     steps:
 
@@ -1,12 +1,106 @@
 # collaborators can optionally add themselves here to indicate their availability for reviewing related PRs
+# multiplie collaborators per item can be specified
 
-/ci/ @ggerganov
-/.devops/*.Dockerfile @ngxson
-/tools/server/ @ngxson
-/ggml/src/ggml-cuda/fattn* @JohannesGaessler
-/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
-/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
-/ggml/src/ggml-opt.cpp @JohannesGaessler
-/ggml/src/gguf.cpp @JohannesGaessler
-/ggml/src/ggml-vulkan/ @0cc4m
-/ggml/src/ggml-zdnn/ @taronaeo
+/.devops/*.Dockerfile                   @ngxson
+/.github/actions/                       @slaren
+/.github/workflows/                     @CISC
+/.github/workflows/release.yml          @slaren
+/.github/workflows/winget.yml           @slaren
+/ci/                                    @ggerganov
+/cmake/                                 @ggerganov
+/common/CMakeLists.txt                  @ggerganov
+/common/arg.*                           @ggerganov @ericcurtin
+/common/base64.hpp.*                    @ggerganov
+/common/build-info.*                    @ggerganov
+/common/common.*                        @ggerganov
+/common/console.*                       @ggerganov
+/common/llguidance.*                    @ggerganov
+/common/log.*                           @ggerganov
+/common/sampling.*                      @ggerganov
+/common/speculative.*                   @ggerganov
+/convert_*.py                           @CISC
+/examples/batched.swift/                @ggerganov
+/examples/batched/                      @ggerganov
+/examples/convert-llama2c-to-ggml/      @ggerganov
+/examples/deprecation-warning/          @ggerganov
+/examples/diffusion/                    @am17an
+/examples/embedding/                    @ggerganov
+/examples/eval-callback/                @ggerganov
+/examples/export-docs/                  @ggerganov
+/examples/gen-docs/                     @ggerganov
+/examples/gguf/                         @ggerganov
+/examples/llama.android/                @ggerganov
+/examples/llama.swiftui/                @ggerganov
+/examples/llama.vim                     @ggerganov
+/examples/lookahead/                    @ggerganov
+/examples/lookup/                       @JohannesGaessler
+/examples/model-conversion/             @danbev
+/examples/parallel/                     @ggerganov
+/examples/passkey/                      @ggerganov
+/examples/retrieval/                    @ggerganov
+/examples/save-load-state/              @ggerganov
+/examples/simple-chat/                  @slaren
+/examples/simple/                       @slaren
+/examples/speculative-simple/           @ggerganov
+/examples/speculative/                  @ggerganov
+/ggml/cmake/                            @ggerganov
+/ggml/include/                          @ggerganov @slaren
+/ggml/src/ggml-alloc.c                  @slaren
+/ggml/src/ggml-backend*                 @slaren
+/ggml/src/ggml-blas/                    @slaren
+/ggml/src/ggml-common.h                 @ggerganov @slaren
+/ggml/src/ggml-cpu/                     @ggerganov @slaren
+/ggml/src/ggml-cuda/common.cuh          @slaren
+/ggml/src/ggml-cuda/fattn*              @JohannesGaessler
+/ggml/src/ggml-cuda/ggml-cuda.cu        @slaren
+/ggml/src/ggml-cuda/mmf.*               @JohannesGaessler
+/ggml/src/ggml-cuda/mmq.*               @JohannesGaessler
+/ggml/src/ggml-cuda/mmvf.*              @JohannesGaessler
+/ggml/src/ggml-cuda/mmvq.*              @JohannesGaessler
+/ggml/src/ggml-impl.h                   @ggerganov @slaren
+/ggml/src/ggml-metal/                   @ggerganov
+/ggml/src/ggml-opt.cpp                  @JohannesGaessler
+/ggml/src/ggml-quants.*                 @ggerganov
+/ggml/src/ggml-threading.*              @ggerganov @slaren
+/ggml/src/ggml-vulkan/                  @0cc4m
+/ggml/src/ggml-zdnn/                    @taronaeo
+/ggml/src/ggml.c                        @ggerganov @slaren
+/ggml/src/ggml.cpp                      @ggerganov @slaren
+/ggml/src/gguf.cpp                      @JohannesGaessler @Green-Sky
+/gguf-py/                               @CISC
+/media/                                 @ggerganov
+/scripts/gen*                           @ggerganov
+/scripts/get*                           @ggerganov
+/scripts/sync*                          @ggerganov
+/src/                                   @ggerganov
+/src/llama-adapter.*                    @CISC
+/src/llama-arch.*                       @CISC
+/src/llama-chat.*                       @ngxson
+/src/llama-graph.*                      @CISC
+/src/llama-model-loader.*               @slaren
+/src/llama-model.*                      @CISC
+/src/llama-vocab.*                      @CISC
+/tests/                                 @ggerganov
+/tests/test-backend-ops.cpp             @slaren
+/tests/test-thread-safety.cpp           @slaren
+/tools/batched-bench/                   @ggerganov
+/tools/llama-bench/                     @slaren
+/tools/main/                            @ggerganov
+/tools/mtmd/                            @ngxson
+/tools/perplexity/                      @ggerganov
+/tools/quantize/                        @ggerganov
+/tools/run/                             @ericcurtin
+/tools/server/*                         @ngxson @ggerganov @ericcurtin # no subdir
+/tools/server/webui/                    @allozaur
+/tools/tokenize/                        @ggerganov
+/tools/tts/                             @ggerganov
+/vendor/                                @ggerganov
+.clang-format                           @slaren
+.clang-tidy                             @slaren
+AUTHORS                                 @ggerganov
+CMakeLists.txt                          @ggerganov
+CONTRIBUTING.md                         @ggerganov
+LICENSE                                 @ggerganov
+README.md                               @ggerganov
+SECURITY.md                             @ggerganov
+requirements*.txt                       @CISC
@@ -1,4 +1,12 @@
-# Pull requests (for contributors)
+# Contributors
+
+The project differentiates between 3 levels of contributors:
+
+- Contributors: people who have contributed before (no special privileges)
+- Collaborators (Triage): people with significant contributions, who may be responsible for some parts of the code, and are expected to maintain and review contributions for the code they own
+- Maintainers: responsible for reviewing and merging PRs, after approval from the code owners
+
+# Pull requests (for contributors & collaborators)
 
 - llama.cpp uses the ggml tensor library for model evaluation. If you are unfamiliar with ggml, consider taking a look at the [examples in the ggml repository](https://github.com/ggml-org/ggml/tree/master/examples/). [simple](https://github.com/ggml-org/ggml/tree/master/examples/simple) shows the bare minimum for using ggml. [gpt-2](https://github.com/ggml-org/ggml/tree/master/examples/gpt-2) has minimal implementations for language model inference using GPT-2. [mnist](https://github.com/ggml-org/ggml/tree/master/examples/mnist) demonstrates how to train and evaluate a simple image classifier
 - Test your changes:
@@ -9,15 +17,16 @@
 - Create separate PRs for each feature or fix. Avoid combining unrelated changes in a single PR
 - Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
 - If your PR becomes stale, don't hesitate to ping the maintainers in the comments
+- Maintainers will rely on your insights and approval when making a final decision to approve and merge a PR
+- Consider adding yourself to [CODEOWNERS](CODEOWNERS) to indicate your availability for reviewing related PRs
 
-# Pull requests (for collaborators)
+# Pull requests (for maintainers)
 
 - Squash-merge PRs
 - Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
 - Optionally pick a `<module>` from here: https://github.com/ggml-org/llama.cpp/wiki/Modules
-- Consider adding yourself to [CODEOWNERS](CODEOWNERS)
-- Let authors, who are also collaborators, merge their own PRs
-- When merging a PR by a contributor, make sure you have a good understanding of the changes
+- Let other maintainers, merge their own PRs
+- When merging a PR, make sure you have a good understanding of the changes
 - Be mindful of maintenance: most of the work going into a feature happens after the PR is merged. If the PR author is not committed to contribute long-term, someone else needs to take responsibility (you)
 
 # Coding guidelines
@@ -117,6 +126,21 @@
     #endif // FOO
     ```
 
+# Code maintenance
+
+- Existing code should have designated collaborators and/or maintainers specified in the [CODEOWNERS](CODEOWNERS) file reponsible for:
+  - Reviewing and merging related PRs
+  - Fixing related bugs
+  - Providing developer guidance/support
+
+- When adding or modifying a large piece of code:
+  - If you are a collaborator, make sure to add yourself to [CODEOWNERS](CODEOWNERS) to indicate your availability for reviewing related PRs
+  - If you are a contributor, find an existing collaborator who is willing to review and maintain your code long-term
+  - Provide the necessary CI workflow (and hardware) to test your changes (see [ci/README.md](https://github.com/ggml-org/llama.cpp/tree/master/ci))
+
+- New code should follow the guidelines (coding, naming, etc.) outlined in this document. Exceptions are allowed in isolated, backend-specific parts of the code that do not interface directly with the `ggml` interfaces.
+  _(NOTE: for legacy reasons, existing code is not required to follow this guideline)_
+
 # Documentation
 
 - Documentation is a community effort
 
@@ -274,6 +274,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 | [Vulkan](docs/build.md#vulkan) | GPU |
 | [CANN](docs/build.md#cann) | Ascend NPU |
 | [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
+| [IBM zDNN](docs/backend/zDNN.md) | IBM Z & LinuxONE |
 | [WebGPU [In Progress]](docs/build.md#webgpu) | All |
 | [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
 
@@ -520,8 +521,8 @@ To learn more about model quantization, [read this documentation](tools/quantize
 ## Contributing
 
 - Contributors can open PRs
-- Collaborators can push to branches in the `llama.cpp` repo and merge PRs into the `master` branch
 - Collaborators will be invited based on contributions
+- Maintainers can push to branches in the `llama.cpp` repo and merge PRs into the `master` branch
 - Any help with managing issues, PRs and projects is very appreciated!
 - See [good first issues](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions
 - Read the [CONTRIBUTING.md](CONTRIBUTING.md) for more information