tsisw · dineshReddy6381 · Sep 22, 2025 · Aug 22, 2025 · Aug 22, 2025 · Aug 22, 2025
@@ -23,7 +23,7 @@ AllowShortLambdasOnASingleLine: Inline
 AllowShortLoopsOnASingleLine: false
 AlwaysBreakBeforeMultilineStrings: true
 BinPackArguments: true
-BinPackParameters: true # OnePerLine
+BinPackParameters: false # OnePerLine
 BitFieldColonSpacing: Both
 BreakBeforeBraces: Custom # Attach
 BraceWrapping:
@@ -70,15 +70,18 @@ ExperimentalAutoDetectBinPacking: false
 FixNamespaceComments: true
 IncludeBlocks:   Regroup
 IncludeCategories:
-  - Regex:           '^<.*\.h>'
+  - Regex:           '".*"'
     Priority:        1
     SortPriority:    0
-  - Regex:           '^<.*'
+  - Regex:           '^<.*\.h>'
     Priority:        2
     SortPriority:    0
-  - Regex:           '.*'
+  - Regex:           '^<.*'
     Priority:        3
     SortPriority:    0
+  - Regex:           '.*'
+    Priority:        4
+    SortPriority:    0
 IncludeIsMainRegex: '([-_](test|unittest))?$'
 IncludeIsMainSourceRegex: ''
 IndentAccessModifiers: false

@@ -0,0 +1,130 @@
+# ==============================================================================
+# ARGUMENTS
+# ==============================================================================
+
+# Define the CANN base image for easier version updates later
+ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10
+
+# ==============================================================================
+# BUILD STAGE
+# Compile all binary files and libraries
+# ==============================================================================
+FROM ${CANN_BASE_IMAGE} AS build
+
+# Define the Ascend chip model for compilation. Default is Ascend910B3
+ARG ASCEND_SOC_TYPE=Ascend910B3
+
+# -- Install build dependencies --
+RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
+    yum clean all && \
+    rm -rf /var/cache/yum
+
+# -- Set the working directory --
+WORKDIR /app
+
+# -- Copy project files --
+COPY . .
+
+# -- Set CANN environment variables (required for compilation) --
+# Using ENV instead of `source` allows environment variables to persist across the entire image layer
+ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
+ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
+ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
+ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
+ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
+# ... You can add other environment variables from the original file as needed ...
+# For brevity, only core variables are listed here. You can paste the original ENV list here.
+
+# -- Build llama.cpp --
+# Use the passed ASCEND_SOC_TYPE argument and add general build options
+RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
+    && \
+    cmake -B build \
+        -DGGML_CANN=ON \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DSOC_TYPE=${ASCEND_SOC_TYPE} \
+        . && \
+    cmake --build build --config Release -j$(nproc)
+
+# -- Organize build artifacts for copying in later stages --
+# Create a lib directory to store all .so files
+RUN mkdir -p /app/lib && \
+    find build -name "*.so" -exec cp {} /app/lib \;
+
+# Create a full directory to store all executables and Python scripts
+RUN mkdir -p /app/full && \
+    cp build/bin/* /app/full/ && \
+    cp *.py /app/full/ && \
+    cp -r gguf-py /app/full/ && \
+    cp -r requirements /app/full/ && \
+    cp requirements.txt /app/full/
+    # If you have a tools.sh script, make sure it is copied here
+    # cp .devops/tools.sh /app/full/tools.sh
+
+# ==============================================================================
+# BASE STAGE
+# Create a minimal base image with CANN runtime and common libraries
+# ==============================================================================
+FROM ${CANN_BASE_IMAGE} AS base
+
+# -- Install runtime dependencies --
+RUN yum install -y libgomp curl && \
+    yum clean all && \
+    rm -rf /var/cache/yum
+
+# -- Set CANN environment variables (required for runtime) --
+ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
+ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
+ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
+ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
+# ... You can add other environment variables from the original file as needed ...
+
+WORKDIR /app
+
+# Copy compiled .so files from the build stage
+COPY --from=build /app/lib/ /app
+
+# ==============================================================================
+# FINAL STAGES (TARGETS)
+# ==============================================================================
+
+### Target: full
+# Complete image with all tools, Python bindings, and dependencies
+# ==============================================================================
+FROM base AS full
+
+COPY --from=build /app/full /app
+
+# Install Python dependencies
+RUN yum install -y git python3 python3-pip && \
+    pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
+    pip3 install --no-cache-dir -r requirements.txt && \
+    yum clean all && \
+    rm -rf /var/cache/yum
+
+# You need to provide a tools.sh script as the entrypoint
+ENTRYPOINT ["/app/tools.sh"]
+# If there is no tools.sh, you can set the default to start the server
+# ENTRYPOINT ["/app/llama-server"]
+
+### Target: light
+# Lightweight image containing only llama-cli
+# ==============================================================================
+FROM base AS light
+
+COPY --from=build /app/full/llama-cli /app
+
+ENTRYPOINT [ "/app/llama-cli" ]
+
+### Target: server
+# Dedicated server image containing only llama-server
+# ==============================================================================
+FROM base AS server
+
+ENV LLAMA_ARG_HOST=0.0.0.0
+
+COPY --from=build /app/full/llama-server /app
+
+HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]
+
+ENTRYPOINT [ "/app/llama-server" ]
@@ -4,19 +4,15 @@ FROM ubuntu:$UBUNTU_VERSION AS build
 
 ARG TARGETARCH
 
-ARG GGML_CPU_ARM_ARCH=armv8-a
-
 RUN apt-get update && \
     apt-get install -y build-essential git cmake libcurl4-openssl-dev
 
 WORKDIR /app
 
 COPY . .
 
-RUN if [ "$TARGETARCH" = "amd64" ]; then \
+RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
         cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
-    elif [ "$TARGETARCH" = "arm64" ]; then \
-        cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
     else \
         echo "Unsupported architecture"; \
         exit 1; \

@@ -61,7 +61,7 @@ RUN apt-get update \
     python3 \
     python3-pip \
     && pip install --upgrade pip setuptools wheel \
-    && pip install -r requirements.txt \
+    && pip install --break-system-packages -r requirements.txt \
     && apt autoremove -y \
     && apt clean -y \
     && rm -rf /tmp/* /var/tmp/* \

@@ -49,19 +49,23 @@ COPY --from=build /app/full /app
 
 WORKDIR /app
 
-RUN apt-get update \
-    && apt-get install -y \
-    git \
-    python3 \
-    python3-pip \
-    && pip install --upgrade pip setuptools wheel \
-    && pip install -r requirements.txt \
-    && apt autoremove -y \
-    && apt clean -y \
-    && rm -rf /tmp/* /var/tmp/* \
-    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
-    && find /var/cache -type f -delete
-
+RUN apt-get update && \
+    apt-get install -y \
+        git \
+        python3 \
+        python3-pip \
+        python3-venv && \
+    python3 -m venv /opt/venv && \
+    . /opt/venv/bin/activate && \
+    pip install --upgrade pip setuptools wheel && \
+    pip install -r requirements.txt && \
+    apt autoremove -y && \
+    apt clean -y && \
+    rm -rf /tmp/* /var/tmp/* && \
+    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
+    find /var/cache -type f -delete
+
+ENV PATH="/opt/venv/bin:$PATH"
 
 ENTRYPOINT ["/app/tools.sh"]
 

@@ -1,10 +1,10 @@
 ARG UBUNTU_VERSION=22.04
 # This needs to generally match the container host's environment.
-ARG MUSA_VERSION=rc4.0.1
+ARG MUSA_VERSION=rc4.2.0
 # Target the MUSA build image
-ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-devel-ubuntu${UBUNTU_VERSION}
+ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
 
-ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-runtime-ubuntu${UBUNTU_VERSION}
+ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
 
 FROM ${BASE_MUSA_DEV_CONTAINER} AS build
 

@@ -47,6 +47,7 @@ let
   inherit (lib)
     cmakeBool
     cmakeFeature
+    optionalAttrs
     optionals
     strings
     ;
@@ -197,7 +198,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
     ];
 
   # Environment variables needed for ROCm
-  env = optionals useRocm {
+  env = optionalAttrs useRocm {
     ROCM_PATH = "${rocmPackages.clr}";
     HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
   };

@@ -1,10 +1,10 @@
 ARG UBUNTU_VERSION=24.04
 
 # This needs to generally match the container host's environment.
-ARG ROCM_VERSION=6.3
-ARG AMDGPU_VERSION=6.3
+ARG ROCM_VERSION=6.4
+ARG AMDGPU_VERSION=6.4
 
-# Target the CUDA build image
+# Target the ROCm build image
 ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
 
 ### Build image
@@ -15,16 +15,13 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
 # This is mostly tied to rocBLAS supported archs.
 # gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
 # gfx906 is deprecated
-#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
+#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
 
-ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
-#ARG ROCM_DOCKER_ARCH=gfx1100
+ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
+#ARG ROCM_DOCKER_ARCH='gfx1151'
 
-# Set nvcc architectured
+# Set ROCm architectures
 ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
-# Enable ROCm
-# ENV CC=/opt/rocm/llvm/bin/clang
-# ENV CXX=/opt/rocm/llvm/bin/clang++
 
 RUN apt-get update \
     && apt-get install -y \
@@ -39,8 +36,16 @@ WORKDIR /app
 
 COPY . .
 
+RUN git clone https://github.com/rocm/rocwmma --branch develop --depth 1
+
 RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
-    cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
+    cmake -S . -B build \
+        -DGGML_HIP=ON \
+        -DGGML_HIP_ROCWMMA_FATTN=ON \
+        -DCMAKE_HIP_FLAGS="-I$(pwd)/rocwmma/library/include/" \
+        -DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
+        -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
+        -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
     && cmake --build build --config Release -j$(nproc)
 
 RUN mkdir -p /app/lib \

@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 # Read the first argument into a variable

@@ -2,14 +2,30 @@ ARG UBUNTU_VERSION=24.04
 
 FROM ubuntu:$UBUNTU_VERSION AS build
 
-# Install build tools
-RUN apt update && apt install -y git build-essential cmake wget
+# Ref: https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html
 
-# Install Vulkan SDK and cURL
-RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-    wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
-    apt update -y && \
-    apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
+# Install build tools
+RUN apt update && apt install -y git build-essential cmake wget xz-utils
+
+# Install Vulkan SDK
+ARG VULKAN_VERSION=1.4.321.1
+RUN ARCH=$(uname -m) && \
+    wget -qO /tmp/vulkan-sdk.tar.xz https://sdk.lunarg.com/sdk/download/${VULKAN_VERSION}/linux/vulkan-sdk-linux-${ARCH}-${VULKAN_VERSION}.tar.xz && \
+    mkdir -p /opt/vulkan && \
+    tar -xf /tmp/vulkan-sdk.tar.xz -C /tmp --strip-components=1 && \
+    mv /tmp/${ARCH}/* /opt/vulkan/ && \
+    rm -rf /tmp/*
+
+# Install cURL and Vulkan SDK dependencies
+RUN apt install -y libcurl4-openssl-dev curl \
+    libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev
+
+# Set environment variables
+ENV VULKAN_SDK=/opt/vulkan
+ENV PATH=$VULKAN_SDK/bin:$PATH
+ENV LD_LIBRARY_PATH=$VULKAN_SDK/lib:$LD_LIBRARY_PATH
+ENV CMAKE_PREFIX_PATH=$VULKAN_SDK:$CMAKE_PREFIX_PATH
+ENV PKG_CONFIG_PATH=$VULKAN_SDK/lib/pkgconfig:$PKG_CONFIG_PATH
 
 # Build it
 WORKDIR /app

@@ -48,3 +48,7 @@ end_of_line = unset
 charset = unset
 trim_trailing_whitespace = unset
 insert_final_newline = unset
+
+[vendor/miniaudio/miniaudio.h]
+trim_trailing_whitespace = unset
+insert_final_newline = unset