opencl: transposed gemm/gemv moe kernel with mxfp4,f32 (#16602) #64
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Release | |
on: | |
workflow_dispatch: # allows manual triggering | |
inputs: | |
create_release: | |
description: 'Create new release' | |
required: true | |
type: boolean | |
push: | |
branches: | |
- master | |
paths: ['.github/workflows/release.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
cancel-in-progress: true | |
env: | |
BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" | |
jobs: | |
macOS-arm64: | |
runs-on: macos-14 | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: ccache | |
uses: ggml-org/[email protected] | |
with: | |
key: macOS-latest-cmake-arm64 | |
evict-old-files: 1d | |
- name: Dependencies | |
id: depends | |
continue-on-error: true | |
run: | | |
brew update | |
brew install curl | |
- name: Build | |
id: cmake_build | |
run: | | |
sysctl -a | |
cmake -B build \ | |
-DCMAKE_INSTALL_RPATH='@loader_path' \ | |
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
-DLLAMA_FATAL_WARNINGS=ON \ | |
-DGGML_METAL_USE_BF16=ON \ | |
-DGGML_METAL_EMBED_LIBRARY=ON \ | |
-DGGML_RPC=ON \ | |
${{ env.CMAKE_ARGS }} | |
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) | |
- name: Determine tag name | |
id: tag | |
uses: ./.github/actions/get-tag-name | |
- name: Pack artifacts | |
id: pack_artifacts | |
run: | | |
cp LICENSE ./build/bin/ | |
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/* | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip | |
name: llama-bin-macos-arm64.zip | |
macOS-x64: | |
runs-on: macos-15-intel | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: ccache | |
uses: ggml-org/[email protected] | |
with: | |
key: macOS-latest-cmake-x64 | |
evict-old-files: 1d | |
- name: Dependencies | |
id: depends | |
continue-on-error: true | |
run: | | |
brew update | |
brew install curl | |
- name: Build | |
id: cmake_build | |
run: | | |
sysctl -a | |
# Metal is disabled due to intermittent failures with Github runners not having a GPU: | |
# https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 | |
cmake -B build \ | |
-DCMAKE_INSTALL_RPATH='@loader_path' \ | |
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
-DLLAMA_FATAL_WARNINGS=ON \ | |
-DGGML_METAL=OFF \ | |
-DGGML_RPC=ON \ | |
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3 | |
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) | |
- name: Determine tag name | |
id: tag | |
uses: ./.github/actions/get-tag-name | |
- name: Pack artifacts | |
id: pack_artifacts | |
run: | | |
cp LICENSE ./build/bin/ | |
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/* | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip | |
name: llama-bin-macos-x64.zip | |
ubuntu-22-cpu: | |
strategy: | |
matrix: | |
include: | |
- build: 'x64' | |
os: ubuntu-22.04 | |
# GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm | |
# - build: 'arm64' | |
# os: ubuntu-22.04-arm | |
runs-on: ${{ matrix.os }} | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: ccache | |
uses: ggml-org/[email protected] | |
with: | |
key: ubuntu-cpu-cmake-${{ matrix.build }} | |
evict-old-files: 1d | |
- name: Dependencies | |
id: depends | |
run: | | |
sudo apt-get update | |
sudo apt-get install build-essential libcurl4-openssl-dev | |
- name: Build | |
id: cmake_build | |
run: | | |
cmake -B build \ | |
-DCMAKE_INSTALL_RPATH='$ORIGIN' \ | |
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
-DGGML_BACKEND_DL=ON \ | |
-DGGML_NATIVE=OFF \ | |
-DGGML_CPU_ALL_VARIANTS=ON \ | |
-DLLAMA_FATAL_WARNINGS=ON \ | |
${{ env.CMAKE_ARGS }} | |
cmake --build build --config Release -j $(nproc) | |
- name: Determine tag name | |
id: tag | |
uses: ./.github/actions/get-tag-name | |
- name: Pack artifacts | |
id: pack_artifacts | |
run: | | |
cp LICENSE ./build/bin/ | |
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/* | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip | |
name: llama-bin-ubuntu-${{ matrix.build }}.zip | |
ubuntu-22-vulkan: | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: ccache | |
uses: ggml-org/[email protected] | |
with: | |
key: ubuntu-22-cmake-vulkan | |
evict-old-files: 1d | |
- name: Dependencies | |
id: depends | |
run: | | |
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - | |
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list | |
sudo apt-get update -y | |
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev | |
- name: Build | |
id: cmake_build | |
run: | | |
cmake -B build \ | |
-DCMAKE_INSTALL_RPATH='$ORIGIN' \ | |
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
-DGGML_BACKEND_DL=ON \ | |
-DGGML_NATIVE=OFF \ | |
-DGGML_CPU_ALL_VARIANTS=ON \ | |
-DGGML_VULKAN=ON \ | |
${{ env.CMAKE_ARGS }} | |
cmake --build build --config Release -j $(nproc) | |
- name: Determine tag name | |
id: tag | |
uses: ./.github/actions/get-tag-name | |
- name: Pack artifacts | |
id: pack_artifacts | |
run: | | |
cp LICENSE ./build/bin/ | |
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/* | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip | |
name: llama-bin-ubuntu-vulkan-x64.zip | |
windows-cpu: | |
runs-on: windows-2025 | |
strategy: | |
matrix: | |
include: | |
- arch: 'x64' | |
- arch: 'arm64' | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: ccache | |
uses: ggml-org/[email protected] | |
with: | |
key: windows-latest-cmake-cpu-${{ matrix.arch }} | |
variant: ccache | |
evict-old-files: 1d | |
- name: Install Ninja | |
run: | | |
choco install ninja | |
- name: libCURL | |
id: get_libcurl | |
uses: ./.github/actions/windows-setup-curl | |
with: | |
architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }} | |
- name: Build | |
shell: cmd | |
env: | |
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} | |
run: | | |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }} | |
cmake -S . -B build -G "Ninja Multi-Config" ^ | |
-D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^ | |
-DGGML_NATIVE=OFF ^ | |
-DGGML_BACKEND_DL=ON ^ | |
-DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^ | |
-DGGML_OPENMP=ON ^ | |
-DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" ^ | |
${{ env.CMAKE_ARGS }} | |
cmake --build build --config Release | |
- name: Pack artifacts | |
id: pack_artifacts | |
env: | |
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} | |
run: | | |
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\ | |
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\ | |
7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\* | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
path: llama-bin-win-cpu-${{ matrix.arch }}.zip | |
name: llama-bin-win-cpu-${{ matrix.arch }}.zip | |
windows: | |
runs-on: windows-2025 | |
env: | |
OPENBLAS_VERSION: 0.3.23 | |
VULKAN_VERSION: 1.4.313.2 | |
strategy: | |
matrix: | |
include: | |
- backend: 'vulkan' | |
arch: 'x64' | |
defines: '-DGGML_VULKAN=ON' | |
target: 'ggml-vulkan' | |
- backend: 'opencl-adreno' | |
arch: 'arm64' | |
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' | |
target: 'ggml-opencl' | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
- name: ccache | |
uses: ggml-org/[email protected] | |
with: | |
key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }} | |
variant: ccache | |
evict-old-files: 1d | |
- name: Install Vulkan SDK | |
id: get_vulkan | |
if: ${{ matrix.backend == 'vulkan' }} | |
run: | | |
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe" | |
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install | |
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" | |
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" | |
- name: Install Ninja | |
id: install_ninja | |
run: | | |
choco install ninja | |
- name: Install OpenCL Headers and Libs | |
id: install_opencl | |
if: ${{ matrix.backend == 'opencl-adreno' && matrix.arch == 'arm64' }} | |
run: | | |
git clone https://github.com/KhronosGroup/OpenCL-Headers | |
cd OpenCL-Headers | |
cmake -B build ` | |
-DBUILD_TESTING=OFF ` | |
-DOPENCL_HEADERS_BUILD_TESTING=OFF ` | |
-DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` | |
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" | |
cmake --build build --target install | |
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader | |
cd OpenCL-ICD-Loader | |
cmake -B build-arm64-release ` | |
-A arm64 ` | |
-DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` | |
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" | |
cmake --build build-arm64-release --target install --config release | |
- name: Build | |
id: cmake_build | |
run: | | |
cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_CURL=OFF | |
cmake --build build --config Release --target ${{ matrix.target }} | |
- name: Pack artifacts | |
id: pack_artifacts | |
run: | | |
7z a llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
path: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip | |
name: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip | |
windows-cuda: | |
runs-on: windows-2022 | |
strategy: | |
matrix: | |
cuda: ['12.4'] | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
- name: Install ccache | |
uses: ggml-org/[email protected] | |
with: | |
key: windows-cuda-${{ matrix.cuda }} | |
variant: ccache | |
evict-old-files: 1d | |
- name: Install Cuda Toolkit | |
uses: ./.github/actions/windows-setup-cuda | |
with: | |
cuda_version: ${{ matrix.cuda }} | |
- name: Install Ninja | |
id: install_ninja | |
run: | | |
choco install ninja | |
- name: Build | |
id: cmake_build | |
shell: cmd | |
run: | | |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 | |
cmake -S . -B build -G "Ninja Multi-Config" ^ | |
-DGGML_BACKEND_DL=ON ^ | |
-DGGML_NATIVE=OFF ^ | |
-DGGML_CPU=OFF ^ | |
-DGGML_CUDA=ON ^ | |
-DLLAMA_CURL=OFF | |
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 | |
cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda | |
- name: Pack artifacts | |
id: pack_artifacts | |
run: | | |
7z a llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
path: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
- name: Copy and pack Cuda runtime | |
run: | | |
echo "Cuda install location: ${{ env.CUDA_PATH }}" | |
$dst='.\build\bin\cudart\' | |
robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\* | |
- name: Upload Cuda runtime | |
uses: actions/upload-artifact@v4 | |
with: | |
path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
windows-sycl: | |
runs-on: windows-2022 | |
defaults: | |
run: | |
shell: bash | |
env: | |
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe | |
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel | |
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
- name: ccache | |
uses: ggml-org/[email protected] | |
with: | |
key: windows-latest-cmake-sycl | |
variant: ccache | |
evict-old-files: 1d | |
- name: Install | |
run: | | |
scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL | |
- name: Build | |
id: cmake_build | |
shell: cmd | |
run: | | |
call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force | |
cmake -G "Ninja" -B build ^ | |
-DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx ^ | |
-DCMAKE_BUILD_TYPE=Release ^ | |
-DGGML_BACKEND_DL=ON -DBUILD_SHARED_LIBS=ON ^ | |
-DGGML_CPU=OFF -DGGML_SYCL=ON ^ | |
-DLLAMA_CURL=OFF | |
cmake --build build --target ggml-sycl -j | |
- name: Build the release package | |
id: pack_artifacts | |
run: | | |
echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin" | |
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero_v2.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl-ls.exe" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/tcm.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/libhwloc-15.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin | |
echo "cp oneAPI running time dll files to ./build/bin done" | |
7z a llama-bin-win-sycl-x64.zip ./build/bin/* | |
- name: Upload the release package | |
uses: actions/upload-artifact@v4 | |
with: | |
path: llama-bin-win-sycl-x64.zip | |
name: llama-bin-win-sycl-x64.zip | |
windows-hip: | |
runs-on: windows-2022 | |
env: | |
HIPSDK_INSTALLER_VERSION: "25.Q3" | |
strategy: | |
matrix: | |
include: | |
- name: "radeon" | |
gpu_targets: "gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032" | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
- name: Grab rocWMMA package | |
id: grab_rocwmma | |
run: | | |
curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.0.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.0.0.70001-42~24.04_amd64.deb" | |
7z x rocwmma.deb | |
7z x data.tar | |
- name: Cache ROCm Installation | |
id: cache-rocm | |
uses: actions/cache@v4 | |
with: | |
path: C:\Program Files\AMD\ROCm | |
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }} | |
- name: ccache | |
uses: ggml-org/[email protected] | |
with: | |
key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 | |
evict-old-files: 1d | |
- name: Install ROCm | |
if: steps.cache-rocm.outputs.cache-hit != 'true' | |
id: depends | |
run: | | |
$ErrorActionPreference = "Stop" | |
write-host "Downloading AMD HIP SDK Installer" | |
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" | |
write-host "Installing AMD HIP SDK" | |
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru | |
$completed = $proc.WaitForExit(600000) | |
if (-not $completed) { | |
Write-Error "ROCm installation timed out after 10 minutes. Killing the process" | |
$proc.Kill() | |
exit 1 | |
} | |
if ($proc.ExitCode -ne 0) { | |
Write-Error "ROCm installation failed with exit code $($proc.ExitCode)" | |
exit 1 | |
} | |
write-host "Completed AMD HIP SDK installation" | |
- name: Verify ROCm | |
id: verify | |
run: | | |
# Find and test ROCm installation | |
$clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1 | |
if (-not $clangPath) { | |
Write-Error "ROCm installation not found" | |
exit 1 | |
} | |
& $clangPath.FullName --version | |
- name: Build | |
id: cmake_build | |
run: | | |
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) | |
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" | |
cmake -G "Unix Makefiles" -B build -S . ` | |
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` | |
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` | |
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.0.1/include/ -Wno-ignored-attributes -Wno-nested-anon-types" ` | |
-DCMAKE_BUILD_TYPE=Release ` | |
-DGGML_BACKEND_DL=ON ` | |
-DGGML_NATIVE=OFF ` | |
-DGGML_CPU=OFF ` | |
-DAMDGPU_TARGETS="${{ matrix.gpu_targets }}" ` | |
-DGGML_HIP_ROCWMMA_FATTN=ON ` | |
-DGGML_HIP=ON ` | |
-DLLAMA_CURL=OFF | |
cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS} | |
md "build\bin\rocblas\library\" | |
md "build\bin\hipblaslt\library" | |
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" | |
cp "${env:HIP_PATH}\bin\hipblaslt.dll" "build\bin\" | |
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\" | |
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\" | |
cp "${env:HIP_PATH}\bin\hipblaslt\library\*" "build\bin\hipblaslt\library\" | |
- name: Pack artifacts | |
id: pack_artifacts | |
run: | | |
7z a llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\* | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
path: llama-bin-win-hip-${{ matrix.name }}-x64.zip | |
name: llama-bin-win-hip-${{ matrix.name }}-x64.zip | |
ios-xcode-build: | |
runs-on: macos-15 | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Setup Xcode | |
run: | | |
sudo xcode-select -s /Applications/Xcode_16.4.app | |
- name: Build | |
id: cmake_build | |
run: | | |
sysctl -a | |
cmake -B build -G Xcode \ | |
-DGGML_METAL_USE_BF16=ON \ | |
-DGGML_METAL_EMBED_LIBRARY=ON \ | |
-DLLAMA_CURL=OFF \ | |
-DLLAMA_BUILD_EXAMPLES=OFF \ | |
-DLLAMA_BUILD_TOOLS=OFF \ | |
-DLLAMA_BUILD_TESTS=OFF \ | |
-DLLAMA_BUILD_SERVER=OFF \ | |
-DCMAKE_SYSTEM_NAME=iOS \ | |
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ | |
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml | |
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO | |
- name: xcodebuild for swift package | |
id: xcodebuild | |
run: | | |
./build-xcframework.sh | |
- name: Build Xcode project | |
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build | |
- name: Determine tag name | |
id: tag | |
uses: ./.github/actions/get-tag-name | |
- name: Pack artifacts | |
id: pack_artifacts | |
run: | | |
zip --symlinks -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
path: llama-${{ steps.tag.outputs.name }}-xcframework.zip | |
name: llama-${{ steps.tag.outputs.name }}-xcframework | |
release: | |
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} | |
# Fine-grant permission | |
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token | |
permissions: | |
contents: write # for creating release | |
runs-on: ubuntu-latest | |
needs: | |
- windows | |
- windows-cpu | |
- windows-cuda | |
- windows-sycl | |
- windows-hip | |
- ubuntu-22-cpu | |
- ubuntu-22-vulkan | |
- macOS-arm64 | |
- macOS-x64 | |
- ios-xcode-build | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Determine tag name | |
id: tag | |
uses: ./.github/actions/get-tag-name | |
- name: Download artifacts | |
id: download-artifact | |
uses: actions/download-artifact@v4 | |
with: | |
path: ./artifact | |
merge-multiple: true | |
- name: Move artifacts | |
id: move_artifacts | |
run: | | |
mkdir -p release | |
echo "Adding CPU backend files to existing zips..." | |
for arch in x64 arm64; do | |
cpu_zip="artifact/llama-bin-win-cpu-${arch}.zip" | |
temp_dir=$(mktemp -d) | |
echo "Extracting CPU backend for $arch..." | |
unzip "$cpu_zip" -d "$temp_dir" | |
echo "Adding CPU files to $arch zips..." | |
for target_zip in artifact/llama-bin-win-*-${arch}.zip; do | |
if [[ "$target_zip" == "$cpu_zip" ]]; then | |
continue | |
fi | |
echo "Adding CPU backend to $(basename "$target_zip")" | |
realpath_target_zip=$(realpath "$target_zip") | |
(cd "$temp_dir" && zip -r "$realpath_target_zip" .) | |
done | |
rm -rf "$temp_dir" | |
done | |
echo "Renaming and moving zips to release..." | |
for zip_file in artifact/llama-bin-win-*.zip; do | |
base_name=$(basename "$zip_file" .zip) | |
zip_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip" | |
echo "Moving $zip_file to release/$zip_name" | |
mv "$zip_file" "release/$zip_name" | |
done | |
echo "Moving other artifacts..." | |
mv -v artifact/*.zip release | |
- name: Create release | |
id: create_release | |
uses: ggml-org/action-create-release@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
tag_name: ${{ steps.tag.outputs.name }} | |
- name: Upload release | |
id: upload_release | |
uses: actions/github-script@v3 | |
with: | |
github-token: ${{secrets.GITHUB_TOKEN}} | |
script: | | |
const path = require('path'); | |
const fs = require('fs'); | |
const release_id = '${{ steps.create_release.outputs.id }}'; | |
for (let file of await fs.readdirSync('./release')) { | |
if (path.extname(file) === '.zip') { | |
console.log('uploadReleaseAsset', file); | |
await github.repos.uploadReleaseAsset({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
release_id: release_id, | |
name: file, | |
data: await fs.readFileSync(`./release/${file}`) | |
}); | |
} | |
} |