Skip to content

Commit f8fe358

Browse files
author
anyshu
committed
Merge branch 'master' into diffusion-server
* master: (113 commits) webui: updated the chat service to only include max_tokens in the req… (ggml-org#16489) cpu : optimize the ggml NORM operation (ggml-org#15953) server : host-memory prompt caching (ggml-org#16391) No markdown in cot (ggml-org#16483) model-conversion : add support for SentenceTransformers (ggml-org#16387) ci: add ARM64 Kleidiai build and test support (ggml-org#16462) CANN: Improve ACL graph matching (ggml-org#16166) kleidiai: kernel interface refactoring (ggml-org#16460) [SYCL] refactor soft_max, add soft_max_back (ggml-org#16472) model: EmbeddingGemma Adding Support for SentenceTransformers Dense Modules (ggml-org#16367) refactor: centralize CoT parsing in backend for streaming mode (ggml-org#16394) Disable CUDA host buffers on integrated GPUs (ggml-org#16308) server : fix cancel pending task (ggml-org#16467) metal : mark FA blocks (ggml-org#16372) server : improve context checkpoint logic (ggml-org#16440) ggml webgpu: profiling, CI updates, reworking of command submission (ggml-org#16452) llama : support LiquidAI LFM2-MoE hybrid model (ggml-org#16464) server : add `/v1/health` endpoint (ggml-org#16461) webui : added download action (ggml-org#13552) (ggml-org#16282) presets : fix pooling param for embedding models (ggml-org#16455) ...
2 parents b50856b + 1faa13a commit f8fe358

File tree

329 files changed

+15872
-4577
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

329 files changed

+15872
-4577
lines changed

.devops/intel.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
1+
ARG ONEAPI_VERSION=2025.2.2-0-devel-ubuntu24.04
22

33
## Build Image
44

5-
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
5+
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
66

77
ARG GGML_SYCL_F16=OFF
88
RUN apt-get update && \
@@ -31,7 +31,7 @@ RUN mkdir -p /app/full \
3131
&& cp requirements.txt /app/full \
3232
&& cp .devops/tools.sh /app/full/tools.sh
3333

34-
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
34+
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
3535

3636
RUN apt-get update \
3737
&& apt-get install -y libgomp1 curl\

.devops/musa.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
33
ARG MUSA_VERSION=rc4.3.0
44
# Target the MUSA build image
5-
ARG BASE_MUSA_DEV_CONTAINER=sh-harbor.mthreads.com/haive/mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
5+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
66

7-
ARG BASE_MUSA_RUN_CONTAINER=sh-harbor.mthreads.com/haive/mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
7+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
88

99
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
1010

.devops/nix/package.nix

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,6 @@ effectiveStdenv.mkDerivation (finalAttrs: {
128128
};
129129

130130
postPatch = ''
131-
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
132-
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
133-
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
134-
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
135131
'';
136132

137133
# With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,

.devops/rocm.Dockerfile

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
ARG UBUNTU_VERSION=24.04
22

33
# This needs to generally match the container host's environment.
4-
ARG ROCM_VERSION=6.4
5-
ARG AMDGPU_VERSION=6.4
4+
ARG ROCM_VERSION=7.0
5+
ARG AMDGPU_VERSION=7.0
66

77
# Target the ROCm build image
88
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
@@ -13,9 +13,8 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1313
# Unless otherwise specified, we make a fat build.
1414
# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
1515
# This is mostly tied to rocBLAS supported archs.
16-
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
17-
# gfx906 is deprecated
18-
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
16+
# gfx803, gfx900, gfx906, gfx1032, gfx1101, gfx1102,not officialy supported
17+
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
1918

2019
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
2120
#ARG ROCM_DOCKER_ARCH='gfx1151'
@@ -36,13 +35,10 @@ WORKDIR /app
3635

3736
COPY . .
3837

39-
RUN git clone https://github.com/rocm/rocwmma --branch develop --depth 1
40-
4138
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
4239
cmake -S . -B build \
4340
-DGGML_HIP=ON \
4441
-DGGML_HIP_ROCWMMA_FATTN=ON \
45-
-DCMAKE_HIP_FLAGS="-I$(pwd)/rocwmma/library/include/" \
4642
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
4743
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
4844
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
name: "Install exe"
2+
description: "Download and install exe"
3+
inputs:
4+
url:
5+
description: "URL of the exe installer"
6+
required: true
7+
args:
8+
description: "Installer arguments"
9+
required: true
10+
timeout:
11+
description: "Timeout (in ms)"
12+
required: false
13+
default: "600000"
14+
15+
runs:
16+
using: "composite"
17+
steps:
18+
- name: Install EXE
19+
shell: pwsh
20+
run: |
21+
$ErrorActionPreference = "Stop"
22+
write-host "Downloading Installer EXE"
23+
Invoke-WebRequest -Uri "${{ inputs.url }}" -OutFile "${env:RUNNER_TEMP}\temp-install.exe"
24+
write-host "Installing"
25+
$proc = Start-Process "${env:RUNNER_TEMP}\temp-install.exe" -ArgumentList '${{ inputs.args }}' -NoNewWindow -PassThru
26+
$completed = $proc.WaitForExit(${{ inputs.timeout }})
27+
if (-not $completed) {
28+
Write-Error "Installer timed out. Killing the process"
29+
$proc.Kill()
30+
exit 1
31+
}
32+
if ($proc.ExitCode -ne 0) {
33+
Write-Error "Installer failed with exit code $($proc.ExitCode)"
34+
exit 1
35+
}
36+
write-host "Completed installation"
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: "Linux - Setup SpacemiT Toolchain"
2+
description: "Setup SpacemiT Toolchain for Linux"
3+
inputs:
4+
path:
5+
description: "Installation path"
6+
required: true
7+
version:
8+
description: "SpacemiT toolchain version"
9+
required: true
10+
11+
runs:
12+
using: "composite"
13+
steps:
14+
- name: Setup SpacemiT Toolchain
15+
id: setup
16+
uses: ./.github/actions/unarchive-tar
17+
with:
18+
url: https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v${{ inputs.version }}.tar.xz
19+
path: ${{ inputs.path }}
20+
strip: 1
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: "Linux - Setup Vulkan SDK"
2+
description: "Setup Vulkan SDK for Linux"
3+
inputs:
4+
path:
5+
description: "Installation path"
6+
required: true
7+
version:
8+
description: "Vulkan SDK version"
9+
required: true
10+
11+
runs:
12+
using: "composite"
13+
steps:
14+
- name: Setup Vulkan SDK
15+
id: setup
16+
uses: ./.github/actions/unarchive-tar
17+
with:
18+
url: https://sdk.lunarg.com/sdk/download/${{ inputs.version }}/linux/vulkan_sdk.tar.xz
19+
path: ${{ inputs.path }}
20+
strip: 1
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: "Unarchive tar"
2+
description: "Download and unarchive tar into directory"
3+
inputs:
4+
url:
5+
description: "URL of the tar archive"
6+
required: true
7+
path:
8+
description: "Directory to unarchive into"
9+
required: true
10+
type:
11+
description: "Compression type (tar option)"
12+
required: false
13+
default: "J"
14+
strip:
15+
description: "Strip components"
16+
required: false
17+
default: "0"
18+
19+
runs:
20+
using: "composite"
21+
steps:
22+
- name: Unarchive into directory
23+
shell: bash
24+
run: |
25+
mkdir -p ${{ inputs.path }}
26+
cd ${{ inputs.path }}
27+
curl --no-progress-meter ${{ inputs.url }} | tar -${{ inputs.type }}x --strip-components=${{ inputs.strip }}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: "Windows - Setup ROCm"
2+
description: "Setup ROCm for Windows"
3+
inputs:
4+
version:
5+
description: "ROCm version"
6+
required: true
7+
8+
runs:
9+
using: "composite"
10+
steps:
11+
- name: Setup ROCm
12+
uses: ./.github/actions/install-exe
13+
with:
14+
url: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ inputs.version }}-WinSvr2022-For-HIP.exe
15+
args: -install

.github/workflows/build-amd.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: CI (AMD)
2+
3+
on:
4+
workflow_dispatch: # allows manual triggering
5+
push:
6+
branches:
7+
- master
8+
paths: [
9+
'.github/workflows/build-amd.yml',
10+
'**/CMakeLists.txt',
11+
'**/.cmake',
12+
'**/*.h',
13+
'**/*.hpp',
14+
'**/*.c',
15+
'**/*.cpp',
16+
'**/*.cu',
17+
'**/*.cuh',
18+
'**/*.comp'
19+
]
20+
21+
concurrency:
22+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
23+
cancel-in-progress: true
24+
25+
jobs:
26+
ggml-ci-x64-amd-vulkan:
27+
runs-on: [self-hosted, Linux, X64, AMD]
28+
29+
steps:
30+
- name: Clone
31+
id: checkout
32+
uses: actions/checkout@v4
33+
34+
- name: Test
35+
id: ggml-ci
36+
run: |
37+
vulkaninfo --summary
38+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
39+
40+
ggml-ci-x64-amd-rocm:
41+
runs-on: [self-hosted, Linux, X64, AMD]
42+
43+
steps:
44+
- name: Clone
45+
id: checkout
46+
uses: actions/checkout@v4
47+
48+
- name: Test
49+
id: ggml-ci
50+
run: |
51+
amd-smi static
52+
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

0 commit comments

Comments
 (0)