Skip to content

Commit d03514c

Browse files
iacopoiacopPBK
authored andcommitted
Major refactoring: remove GFX906-specific optimizations and update build configuration
1 parent f1f98fa commit d03514c

File tree

549 files changed

+55560
-37233
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

549 files changed

+55560
-37233
lines changed

.clang-format

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@ AllowShortIfStatementsOnASingleLine: Never
2222
AllowShortLambdasOnASingleLine: Inline
2323
AllowShortLoopsOnASingleLine: false
2424
AlwaysBreakBeforeMultilineStrings: true
25+
# Treat CUDA keywords/attributes as "attribute macros" and avoid breaking lines inside them
26+
AttributeMacros:
27+
- __host__
28+
- __device__
29+
- __global__
30+
- __forceinline__
31+
- __launch_bounds__
2532
BinPackArguments: true
2633
BinPackParameters: false # OnePerLine
2734
BitFieldColonSpacing: Both

.devops/rocm.Dockerfile

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ARG UBUNTU_VERSION=24.04
44
ARG ROCM_VERSION=6.4
55
ARG AMDGPU_VERSION=6.4
66

7-
# Target the CUDA build image
7+
# Target the ROCm build image
88
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
99

1010
### Build image
@@ -15,16 +15,13 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1515
# This is mostly tied to rocBLAS supported archs.
1616
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
1717
# gfx906 is deprecated
18-
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
18+
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
1919

20-
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21-
#ARG ROCM_DOCKER_ARCH=gfx1100
20+
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
21+
#ARG ROCM_DOCKER_ARCH='gfx1151'
2222

23-
# Set nvcc architectured
23+
# Set ROCm architectures
2424
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
25-
# Enable ROCm
26-
# ENV CC=/opt/rocm/llvm/bin/clang
27-
# ENV CXX=/opt/rocm/llvm/bin/clang++
2825

2926
RUN apt-get update \
3027
&& apt-get install -y \
@@ -39,8 +36,16 @@ WORKDIR /app
3936

4037
COPY . .
4138

39+
RUN git clone https://github.com/rocm/rocwmma --branch develop --depth 1
40+
4241
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43-
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
42+
cmake -S . -B build \
43+
-DGGML_HIP=ON \
44+
-DGGML_HIP_ROCWMMA_FATTN=ON \
45+
-DCMAKE_HIP_FLAGS="-I$(pwd)/rocwmma/library/include/" \
46+
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
47+
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
48+
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
4449
&& cmake --build build --config Release -j$(nproc)
4550

4651
RUN mkdir -p /app/lib \

.editorconfig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,11 @@ insert_final_newline = unset
5252
[vendor/miniaudio/miniaudio.h]
5353
trim_trailing_whitespace = unset
5454
insert_final_newline = unset
55+
56+
[tools/server/webui/**]
57+
indent_style = unset
58+
indent_size = unset
59+
end_of_line = unset
60+
charset = unset
61+
trim_trailing_whitespace = unset
62+
insert_final_newline = unset

.github/workflows/build.yml

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ env:
5656

5757
jobs:
5858
macOS-latest-cmake-arm64:
59-
runs-on: macos-14
59+
runs-on: macos-latest
6060

6161
steps:
6262
- name: Clone
@@ -88,6 +88,7 @@ jobs:
8888
-DGGML_METAL_SHADER_DEBUG=ON \
8989
-DGGML_RPC=ON
9090
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
91+
leaks -atExit -- ./build/bin/test-thread-safety -hf ggml-org/gemma-3-270m-qat-GGUF -ngl 99 -p "$(printf 'hello %.0s' {1..128})" -n 16 -c 512 -ub 32 -np 2 -t 2 -lv 1
9192
9293
- name: Test
9394
id: cmake_test
@@ -126,7 +127,8 @@ jobs:
126127
-DCMAKE_BUILD_RPATH="@loader_path" \
127128
-DLLAMA_FATAL_WARNINGS=ON \
128129
-DGGML_METAL=OFF \
129-
-DGGML_RPC=ON
130+
-DGGML_RPC=ON \
131+
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
130132
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
131133
132134
- name: Test
@@ -136,7 +138,7 @@ jobs:
136138
ctest -L main --verbose --timeout 900
137139
138140
macOS-latest-cmake-arm64-webgpu:
139-
runs-on: macos-14
141+
runs-on: macos-latest
140142

141143
steps:
142144
- name: Clone
@@ -709,6 +711,7 @@ jobs:
709711
710712
macOS-latest-swift:
711713
runs-on: macos-latest
714+
needs: ios-xcode-build
712715

713716
strategy:
714717
matrix:
@@ -725,6 +728,12 @@ jobs:
725728
key: macOS-latest-swift
726729
evict-old-files: 1d
727730

731+
- name: Download xcframework artifact
732+
uses: actions/download-artifact@v4
733+
with:
734+
name: llama-xcframework
735+
path: build-apple/llama.xcframework/
736+
728737
- name: Dependencies
729738
id: depends
730739
continue-on-error: true
@@ -746,11 +755,6 @@ jobs:
746755
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
747756
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
748757
749-
- name: xcodebuild for swift package
750-
id: xcodebuild
751-
run: |
752-
./build-xcframework.sh
753-
754758
windows-msys2:
755759
runs-on: windows-2025
756760

@@ -1050,9 +1054,13 @@ jobs:
10501054
run: examples/sycl/win-build-sycl.bat
10511055

10521056
windows-latest-cmake-hip:
1053-
if: ${{ github.event.inputs.create_release != 'true' }}
10541057
runs-on: windows-2022
10551058

1059+
env:
1060+
# The ROCm version must correspond to the version used in the HIP SDK.
1061+
ROCM_VERSION: "6.4.2"
1062+
HIPSDK_INSTALLER_VERSION: "25.Q3"
1063+
10561064
steps:
10571065
- name: Clone
10581066
id: checkout
@@ -1061,23 +1069,46 @@ jobs:
10611069
- name: Clone rocWMMA repository
10621070
id: clone_rocwmma
10631071
run: |
1064-
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
1072+
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1
10651073
1066-
- name: Install
1074+
- name: Cache ROCm Installation
1075+
id: cache-rocm
1076+
uses: actions/cache@v4
1077+
with:
1078+
path: C:\Program Files\AMD\ROCm
1079+
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
1080+
1081+
- name: Install ROCm
1082+
if: steps.cache-rocm.outputs.cache-hit != 'true'
10671083
id: depends
10681084
run: |
10691085
$ErrorActionPreference = "Stop"
10701086
write-host "Downloading AMD HIP SDK Installer"
1071-
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1087+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
10721088
write-host "Installing AMD HIP SDK"
10731089
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
1074-
$proc.WaitForExit(600000)
1090+
$completed = $proc.WaitForExit(600000)
1091+
if (-not $completed) {
1092+
Write-Error "ROCm installation timed out after 10 minutes. Killing the process"
1093+
$proc.Kill()
1094+
exit 1
1095+
}
1096+
if ($proc.ExitCode -ne 0) {
1097+
Write-Error "ROCm installation failed with exit code $($proc.ExitCode)"
1098+
exit 1
1099+
}
10751100
write-host "Completed AMD HIP SDK installation"
10761101
10771102
- name: Verify ROCm
10781103
id: verify
10791104
run: |
1080-
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1105+
# Find and test ROCm installation
1106+
$clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1
1107+
if (-not $clangPath) {
1108+
Write-Error "ROCm installation not found"
1109+
exit 1
1110+
}
1111+
& $clangPath.FullName --version
10811112
10821113
- name: Install ccache
10831114
uses: ggml-org/[email protected]
@@ -1141,8 +1172,17 @@ jobs:
11411172
run: |
11421173
./build-xcframework.sh
11431174
1175+
- name: Upload xcframework artifact
1176+
uses: actions/upload-artifact@v4
1177+
with:
1178+
name: llama-xcframework
1179+
path: build-apple/llama.xcframework/
1180+
retention-days: 1
1181+
11441182
- name: Build Xcode project
1145-
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
1183+
run: |
1184+
xcodebuild -downloadPlatform iOS
1185+
xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
11461186
11471187
android-build:
11481188
runs-on: ubuntu-latest

.github/workflows/release.yml

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ jobs:
108108
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
109109
-DLLAMA_FATAL_WARNINGS=ON \
110110
-DGGML_METAL=OFF \
111-
-DGGML_RPC=ON
111+
-DGGML_RPC=ON \
112+
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
112113
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
113114
114115
- name: Determine tag name
@@ -528,11 +529,14 @@ jobs:
528529
windows-hip:
529530
runs-on: windows-2022
530531

532+
env:
533+
HIPSDK_INSTALLER_VERSION: "25.Q3"
534+
531535
strategy:
532536
matrix:
533537
include:
534538
- name: "radeon"
535-
gpu_targets: "gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
539+
gpu_targets: "gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
536540

537541
steps:
538542
- name: Clone
@@ -542,29 +546,52 @@ jobs:
542546
- name: Clone rocWMMA repository
543547
id: clone_rocwmma
544548
run: |
545-
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
549+
git clone https://github.com/rocm/rocwmma --branch develop --depth 1
550+
551+
- name: Cache ROCm Installation
552+
id: cache-rocm
553+
uses: actions/cache@v4
554+
with:
555+
path: C:\Program Files\AMD\ROCm
556+
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
546557

547558
- name: ccache
548559
uses: ggml-org/[email protected]
549560
with:
550-
key: windows-latest-cmake-hip-${{ matrix.name }}-x64
561+
key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64
551562
evict-old-files: 1d
552563

553-
- name: Install
564+
- name: Install ROCm
565+
if: steps.cache-rocm.outputs.cache-hit != 'true'
554566
id: depends
555567
run: |
556568
$ErrorActionPreference = "Stop"
557569
write-host "Downloading AMD HIP SDK Installer"
558-
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
570+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
559571
write-host "Installing AMD HIP SDK"
560572
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
561-
$proc.WaitForExit(600000)
573+
$completed = $proc.WaitForExit(600000)
574+
if (-not $completed) {
575+
Write-Error "ROCm installation timed out after 10 minutes. Killing the process"
576+
$proc.Kill()
577+
exit 1
578+
}
579+
if ($proc.ExitCode -ne 0) {
580+
Write-Error "ROCm installation failed with exit code $($proc.ExitCode)"
581+
exit 1
582+
}
562583
write-host "Completed AMD HIP SDK installation"
563584
564585
- name: Verify ROCm
565586
id: verify
566587
run: |
567-
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
588+
# Find and test ROCm installation
589+
$clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1
590+
if (-not $clangPath) {
591+
Write-Error "ROCm installation not found"
592+
exit 1
593+
}
594+
& $clangPath.FullName --version
568595
569596
- name: Build
570597
id: cmake_build
@@ -585,9 +612,12 @@ jobs:
585612
-DLLAMA_CURL=OFF
586613
cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
587614
md "build\bin\rocblas\library\"
615+
md "build\bin\hipblaslt\library"
588616
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
617+
cp "${env:HIP_PATH}\bin\hipblaslt.dll" "build\bin\"
589618
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
590619
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
620+
cp "${env:HIP_PATH}\bin\hipblaslt\library\*" "build\bin\hipblaslt\library\"
591621
592622
- name: Pack artifacts
593623
id: pack_artifacts

0 commit comments

Comments
 (0)