Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
704d90c
Revert "sycl: add usage of enqueue_functions extension (#14244)" (#1…
NeoZhangJianyu Sep 12, 2025
6c88ad8
vulkan: Make device memory check more portable (#15939)
mbaudier Sep 12, 2025
304ac56
Vulkan iGPU device selection overhaul and PCI ID API support (#15947)
0cc4m Sep 12, 2025
f088b6a
server : adjust prompt similarity thold + add logs (#15913)
ggerganov Sep 12, 2025
f4e664f
context : remove redundant explicit casting to the same type (#15948)
haiyuewa Sep 12, 2025
4bf5549
Add docker protocol support for llama-server model loading (#15790)
ericcurtin Sep 12, 2025
40be511
ggml-zdnn: fix #15414, activate FP16 and BF16 acceleration and incorr…
taronaeo Sep 12, 2025
84d7b2f
metal : fix memory leaks (#15962)
ggerganov Sep 13, 2025
f161463
metal : allow ops to run concurrently (#15929)
ggerganov Sep 13, 2025
55758b0
metal : refactor kernel loading (#15964)
ggerganov Sep 13, 2025
50f4281
llama : allow using iGPUs with --device (#15951)
slaren Sep 13, 2025
b9c9c9f
vulkan: initialize vulkan-hpp to allow using extension function point…
jeffbolznv Sep 13, 2025
aa0c461
vulkan: fix failing dequant shaders (#15862)
jeffbolznv Sep 13, 2025
6380d6a
ggml-zdnn: rm user mapped buffers (#15965)
taronaeo Sep 14, 2025
d1c6f11
doc : update documentation for --tensor-split (#15980)
rgerganov Sep 14, 2025
9ecb884
releases : update ROCM, add gfx1200, gfx1201, gfx1151 (#15972)
slaren Sep 14, 2025
918b26f
rpc : fix regression when --device is used (#15981)
rgerganov Sep 14, 2025
a14bd35
metal : fix kernel requirements (#15983)
ggerganov Sep 14, 2025
a0e13dc
build: fix the build failures of Windows HIP release job (#15984)
lcy0321 Sep 14, 2025
261e6a2
Vulkan: Clean up mul_mm shader (#15987)
0cc4m Sep 14, 2025
0fa154e
rocm.Dockerfile: added gfx1200,gfx1201 architectures to support AMD …
channeladam Sep 14, 2025
9dcd200
metal : remove memory pools (#15966)
ggerganov Sep 14, 2025
6c019cb
server : only attempt to enable thinking if using jinja (#15967)
CISC Sep 14, 2025
b8e09f0
model : add grok-2 support (#15539)
CISC Sep 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .devops/rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ARG UBUNTU_VERSION=24.04
ARG ROCM_VERSION=6.4
ARG AMDGPU_VERSION=6.4

# Target the CUDA build image
# Target the ROCm build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete

### Build image
Expand All @@ -15,12 +15,12 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
# This is mostly tied to rocBLAS supported archs.
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
# gfx906 is deprecated
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html

ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201'
#ARG ROCM_DOCKER_ARCH=gfx1100

# Set nvcc architectured
# Set ROCm architectured
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
# ENV CC=/opt/rocm/llvm/bin/clang
Expand Down
18 changes: 11 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ jobs:
-DGGML_METAL_SHADER_DEBUG=ON \
-DGGML_RPC=ON
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
leaks -atExit -- ./build/bin/test-thread-safety -hf ggml-org/gemma-3-270m-qat-GGUF -ngl 99 -p "$(printf 'hello %.0s' {1..128})" -n 16 -c 512 -ub 32 -np 2 -t 2 -lv 1

- name: Test
id: cmake_test
Expand Down Expand Up @@ -126,7 +127,8 @@ jobs:
-DCMAKE_BUILD_RPATH="@loader_path" \
-DLLAMA_FATAL_WARNINGS=ON \
-DGGML_METAL=OFF \
-DGGML_RPC=ON
-DGGML_RPC=ON \
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

- name: Test
Expand Down Expand Up @@ -1050,9 +1052,13 @@ jobs:
run: examples/sycl/win-build-sycl.bat

windows-latest-cmake-hip:
if: ${{ github.event.inputs.create_release != 'true' }}
runs-on: windows-2022

env:
# The ROCm version must correspond to the version used in the HIP SDK.
ROCM_VERSION: "6.4.2"
HIPSDK_INSTALLER_VERSION: "25.Q3"

steps:
- name: Clone
id: checkout
Expand All @@ -1061,24 +1067,22 @@ jobs:
- name: Clone rocWMMA repository
id: clone_rocwmma
run: |
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1

- name: Cache ROCm Installation
id: cache-rocm
uses: actions/cache@v4
with:
path: C:\Program Files\AMD\ROCm
key: rocm-6.1-${{ runner.os }}-v1
restore-keys: |
rocm-6.1-${{ runner.os }}-
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}

- name: Install ROCm
if: steps.cache-rocm.outputs.cache-hit != 'true'
id: depends
run: |
$ErrorActionPreference = "Stop"
write-host "Downloading AMD HIP SDK Installer"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
write-host "Installing AMD HIP SDK"
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
$completed = $proc.WaitForExit(600000)
Expand Down
23 changes: 15 additions & 8 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ jobs:
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DLLAMA_FATAL_WARNINGS=ON \
-DGGML_METAL=OFF \
-DGGML_RPC=ON
-DGGML_RPC=ON \
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

- name: Determine tag name
Expand Down Expand Up @@ -528,11 +529,16 @@ jobs:
windows-hip:
runs-on: windows-2022

env:
# The ROCm version must correspond to the version used in the HIP SDK.
ROCM_VERSION: "6.4.2"
HIPSDK_INSTALLER_VERSION: "25.Q3"

strategy:
matrix:
include:
- name: "radeon"
gpu_targets: "gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
gpu_targets: "gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"

steps:
- name: Clone
Expand All @@ -542,21 +548,19 @@ jobs:
- name: Clone rocWMMA repository
id: clone_rocwmma
run: |
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1

- name: Cache ROCm Installation
id: cache-rocm
uses: actions/cache@v4
with:
path: C:\Program Files\AMD\ROCm
key: rocm-6.1-${{ runner.os }}-v1
restore-keys: |
rocm-6.1-${{ runner.os }}-
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}

- name: ccache
uses: ggml-org/[email protected]
with:
key: windows-latest-cmake-hip-${{ matrix.name }}-x64
key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64
evict-old-files: 1d

- name: Install ROCm
Expand All @@ -565,7 +569,7 @@ jobs:
run: |
$ErrorActionPreference = "Stop"
write-host "Downloading AMD HIP SDK Installer"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
write-host "Installing AMD HIP SDK"
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
$completed = $proc.WaitForExit(600000)
Expand Down Expand Up @@ -610,9 +614,12 @@ jobs:
-DLLAMA_CURL=OFF
cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
md "build\bin\rocblas\library\"
md "build\bin\hipblaslt\library"
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
cp "${env:HIP_PATH}\bin\hipblaslt.dll" "build\bin\"
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
cp "${env:HIP_PATH}\bin\hipblaslt\library\*" "build\bin\hipblaslt\library\"

- name: Pack artifacts
id: pack_artifacts
Expand Down
23 changes: 13 additions & 10 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,9 @@ function gg_run_ctest_with_model_debug {
local model; model=$(gg_get_model)
cd build-ci-debug
set -e

(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log

set +e
cd ..
}
Expand All @@ -281,7 +283,15 @@ function gg_run_ctest_with_model_release {
local model; model=$(gg_get_model)
cd build-ci-release
set -e

(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log

# test memory leaks
#if [[ ! -z ${GG_BUILD_METAL} ]]; then
# # TODO: this hangs for some reason ...
# (time leaks -quiet -atExit -- ./bin/test-thread-safety -m $model --parallel 2 -t 2 -p "hello") 2>&1 | tee -a $OUT/${ci}-leaks.log
#fi

set +e
cd ..
}
Expand Down Expand Up @@ -860,20 +870,15 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
fi

ret=0
if [ -z ${GG_BUILD_SYCL} ]; then
# SYCL build breaks with debug build flags
test $ret -eq 0 && gg_run ctest_debug
fi
test $ret -eq 0 && gg_run ctest_debug
test $ret -eq 0 && gg_run ctest_release

if [ -z ${GG_BUILD_LOW_PERF} ]; then
test $ret -eq 0 && gg_run embd_bge_small
test $ret -eq 0 && gg_run rerank_tiny

if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
if [ -z ${GG_BUILD_SYCL} ]; then
test $ret -eq 0 && gg_run test_scripts_debug
fi
test $ret -eq 0 && gg_run test_scripts_debug
test $ret -eq 0 && gg_run test_scripts_release
fi

Expand All @@ -884,9 +889,7 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
test $ret -eq 0 && gg_run pythia_2_8b
#test $ret -eq 0 && gg_run open_llama_7b_v2
fi
if [ -z ${GG_BUILD_SYCL} ]; then
test $ret -eq 0 && gg_run ctest_with_model_debug
fi
test $ret -eq 0 && gg_run ctest_with_model_debug
test $ret -eq 0 && gg_run ctest_with_model_release
fi
fi
Expand Down
Loading