Add circular tiling support to pad, for Vulkan, CUDA, and CPU (used for making seamless textures) #29772

Workflow file for this run

	name: CI

	on:
	workflow_dispatch: # allows manual triggering
	push:
	branches:
	- master
	paths: [
	'.github/workflows/build.yml',
	'.github/workflows/build-linux-cross.yml',
	'.github/workflows/build-cmake-pkg.yml',
	'**/CMakeLists.txt',
	'**/.cmake',
	'*/.h',
	'*/.hpp',
	'*/.c',
	'*/.cpp',
	'*/.cu',
	'*/.cuh',
	'*/.swift',
	'*/.m',
	'*/.metal',
	'*/.comp'
	]

	pull_request:
	types: [opened, synchronize, reopened]
	paths: [
	'.github/workflows/build.yml',
	'.github/workflows/build-linux-cross.yml',
	'.github/workflows/build-cmake-pkg.yml',
	'**/CMakeLists.txt',
	'**/.cmake',
	'*/.h',
	'*/.hpp',
	'*/.c',
	'*/.cpp',
	'*/.cu',
	'*/.cuh',
	'*/.swift',
	'*/.m',
	'*/.metal',
	'*/.comp'
	]

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref && github.ref \|\| github.run_id }}
	cancel-in-progress: true

	env:
	GGML_NLOOP: 3
	GGML_N_THREADS: 1
	LLAMA_LOG_COLORS: 1
	LLAMA_LOG_PREFIX: 1
	LLAMA_LOG_TIMESTAMPS: 1

	jobs:
	macOS-latest-cmake-arm64:
	runs-on: macos-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: macOS-latest-cmake-arm64
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update
	brew install curl

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	cmake -B build \
	-DCMAKE_BUILD_RPATH="@loader_path" \
	-DLLAMA_FATAL_WARNINGS=ON \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=OFF \
	-DGGML_METAL_SHADER_DEBUG=ON \
	-DGGML_RPC=ON
	cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
	leaks -atExit -- ./build/bin/test-thread-safety -hf ggml-org/gemma-3-270m-qat-GGUF -ngl 99 -p "$(printf 'hello %.0s' {1..128})" -n 16 -c 512 -ub 32 -np 2 -t 2 -lv 1

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L 'main\|curl' --verbose --timeout 900

	macOS-latest-cmake-x64:
	runs-on: macos-15-intel

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: macOS-latest-cmake-x64
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update
	brew install curl

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	# Metal is disabled due to intermittent failures with Github runners not having a GPU:
	# https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
	cmake -B build \
	-DCMAKE_BUILD_RPATH="@loader_path" \
	-DLLAMA_FATAL_WARNINGS=ON \
	-DGGML_METAL=OFF \
	-DGGML_RPC=ON \
	-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
	cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L main --verbose --timeout 900

	macOS-latest-cmake-arm64-webgpu:
	runs-on: macos-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: macOS-latest-cmake-arm64-webgpu
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update
	brew install curl

	- name: Dawn Dependency
	id: dawn-depends
	run: \|
	DAWN_VERSION="v1.0.0"
	DAWN_OWNER="reeselevine"
	DAWN_REPO="dawn"
	DAWN_ASSET_NAME="Dawn-a1a6b45cced25a3b7f4fb491e0ae70796cc7f22b-macos-latest-Release.tar.gz"
	echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}"
	curl -L -o artifact.tar.gz \
	"https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}"
	mkdir dawn
	tar -xvf artifact.tar.gz -C dawn --strip-components=1

	- name: Build
	id: cmake_build
	run: \|
	export CMAKE_PREFIX_PATH=dawn
	cmake -B build -DGGML_WEBGPU=ON -DGGML_METAL=OFF -DGGML_BLAS=OFF
	cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L main --verbose --timeout 900

	ubuntu-cpu-cmake:
	strategy:
	matrix:
	include:
	- build: 'x64'
	os: ubuntu-22.04
	- build: 'arm64'
	os: ubuntu-22.04-arm
	- build: 's390x'
	os: ubuntu-24.04-s390x
	- build: 'ppc64le'
	os: ubuntu-24.04-ppc64le

	runs-on: ${{ matrix.os }}

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ubuntu-cpu-cmake-${{ matrix.build }}
	evict-old-files: 1d

	- name: Build Dependencies
	id: build_depends
	run: \|
	sudo apt-get update
	sudo apt-get install -y --no-install-recommends \
	python3 python3-pip python3-dev \
	libjpeg-dev build-essential libcurl4-openssl-dev \
	git-lfs

	- name: Python Dependencies
	id: python_depends
	run: \|
	python3 -m pip install --upgrade pip
	pip3 install ./gguf-py

	- name: Swap Endianness
	id: endianness
	if: ${{ matrix.build == 's390x' }}
	run: \|
	for f in models/*.gguf; do
	echo YES \| python3 gguf-py/gguf/scripts/gguf_convert_endian.py $f big
	done

	- name: Build
	id: cmake_build
	run: \|
	cmake -B build \
	-DLLAMA_FATAL_WARNINGS=ON \
	-DGGML_RPC=ON
	cmake --build build --config Release -j $(nproc)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L 'main\|curl' --verbose --timeout 900

	- name: Test llama2c conversion
	id: llama2c_test
	if: ${{ matrix.build != 's390x' }}
	run: \|
	cd build
	echo "Fetch tokenizer"
	wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
	echo "Fetch llama2c model"
	wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
	./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
	./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256

	- name: Test llama2c (s390x)
	id: llama2c_test_s390x
	if: ${{ matrix.build == 's390x' }}
	run: \|
	cd build
	echo "Fetch llama2c big-endian model"
	wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf
	./bin/llama-cli -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256

	ubuntu-latest-cmake-sanitizer:
	runs-on: ubuntu-latest

	continue-on-error: true

	strategy:
	matrix:
	sanitizer: [ADDRESS, THREAD, UNDEFINED]
	build_type: [Debug]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential libcurl4-openssl-dev

	- name: Build
	id: cmake_build
	if: ${{ matrix.sanitizer != 'THREAD' }}
	run: \|
	cmake -B build \
	-DLLAMA_FATAL_WARNINGS=ON \
	-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
	-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
	cmake --build build --config ${{ matrix.build_type }} -j $(nproc)

	- name: Build (no OpenMP)
	id: cmake_build_no_openmp
	if: ${{ matrix.sanitizer == 'THREAD' }}
	run: \|
	cmake -B build \
	-DLLAMA_FATAL_WARNINGS=ON \
	-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
	-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
	-DGGML_OPENMP=OFF
	cmake --build build --config ${{ matrix.build_type }} -j $(nproc)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L main --verbose --timeout 900

	ubuntu-latest-llguidance:
	runs-on: ubuntu-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential libcurl4-openssl-dev

	- name: Build
	id: cmake_build
	run: \|
	mkdir build
	cd build
	cmake .. \
	-DLLAMA_FATAL_WARNINGS=ON \
	-DLLAMA_LLGUIDANCE=ON
	cmake --build . --config Release -j $(nproc)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L main --verbose --timeout 900

	ubuntu-latest-cmake-rpc:
	runs-on: ubuntu-latest

	continue-on-error: true

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	# - name: ccache
	# uses: ggml-org/[email protected]
	# with:
	# key: ubuntu-latest-cmake-rpc
	# evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential libcurl4-openssl-dev

	- name: Build
	id: cmake_build
	run: \|
	cmake -B build \
	-DGGML_RPC=ON
	cmake --build build --config Release -j $(nproc)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L main --verbose

	ubuntu-24-cmake-vulkan-deb:
	runs-on: ubuntu-24.04

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ubuntu-24-cmake-vulkan-deb
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get install -y glslc libvulkan-dev libcurl4-openssl-dev

	- name: Configure
	id: cmake_configure
	run: \|
	cmake -B build \
	-DCMAKE_BUILD_TYPE=RelWithDebInfo \
	-DGGML_BACKEND_DL=ON \
	-DGGML_CPU_ALL_VARIANTS=ON \
	-DGGML_VULKAN=ON

	- name: Build
	id: cmake_build
	run: \|
	cmake --build build -j $(nproc)

	ubuntu-24-cmake-vulkan:
	runs-on: ubuntu-24.04

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ubuntu-24-cmake-vulkan
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo add-apt-repository -y ppa:kisak/kisak-mesa
	sudo apt-get update -y
	sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libcurl4-openssl-dev

	- name: Get latest Vulkan SDK version
	id: vulkan_sdk_version
	run: \|
	echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"

	- name: Use Vulkan SDK Cache
	uses: actions/cache@v4
	id: cache-sdk
	with:
	path: ./vulkan_sdk
	key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}

	- name: Setup Vulkan SDK
	if: steps.cache-sdk.outputs.cache-hit != 'true'
	uses: ./.github/actions/linux-setup-vulkan
	with:
	path: ./vulkan_sdk
	version: ${{ env.VULKAN_SDK_VERSION }}

	- name: Build
	id: cmake_build
	run: \|
	source ./vulkan_sdk/setup-env.sh
	cmake -B build \
	-DGGML_VULKAN=ON
	cmake --build build --config Release -j $(nproc)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	export GGML_VK_VISIBLE_DEVICES=0
	export GGML_VK_DISABLE_F16=1
	# This is using llvmpipe and runs slower than other backends
	ctest -L main --verbose --timeout 4200

	ubuntu-24-cmake-webgpu:
	runs-on: ubuntu-24.04

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ubuntu-24-cmake-webgpu
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo add-apt-repository -y ppa:kisak/kisak-mesa
	sudo apt-get update -y
	sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libcurl4-openssl-dev

	- name: Get latest Vulkan SDK version
	id: vulkan_sdk_version
	run: \|
	echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"

	- name: Use Vulkan SDK Cache
	uses: actions/cache@v4
	id: cache-sdk
	with:
	path: ./vulkan_sdk
	key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}

	- name: Setup Vulkan SDK
	if: steps.cache-sdk.outputs.cache-hit != 'true'
	uses: ./.github/actions/linux-setup-vulkan
	with:
	path: ./vulkan_sdk
	version: ${{ env.VULKAN_SDK_VERSION }}

	- name: Dawn Dependency
	id: dawn-depends
	run: \|
	sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev
	DAWN_VERSION="v1.0.0"
	DAWN_OWNER="reeselevine"
	DAWN_REPO="dawn"
	DAWN_ASSET_NAME="Dawn-a1a6b45cced25a3b7f4fb491e0ae70796cc7f22b-ubuntu-latest-Release.tar.gz"
	echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}"
	curl -L -o artifact.tar.gz \
	"https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}"
	mkdir dawn
	tar -xvf artifact.tar.gz -C dawn --strip-components=1

	- name: Build
	id: cmake_build
	run: \|
	export Dawn_DIR=dawn/lib64/cmake/Dawn
	cmake -B build -DGGML_WEBGPU=ON
	cmake --build build --config Release -j $(nproc)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	# This is using llvmpipe and runs slower than other backends
	ctest -L main --verbose --timeout 3600

	ubuntu-22-cmake-hip:
	runs-on: ubuntu-22.04
	container: rocm/dev-ubuntu-22.04:6.1.2

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libcurl4-openssl-dev rocwmma-dev

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ubuntu-22-cmake-hip
	evict-old-files: 1d

	- name: Build with native CMake HIP support
	id: cmake_build
	run: \|
	cmake -B build -S . \
	-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
	-DGGML_HIP_ROCWMMA_FATTN=ON \
	-DGGML_HIP=ON
	cmake --build build --config Release -j $(nproc)

	ubuntu-22-cmake-musa:
	runs-on: ubuntu-22.04
	container: mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	run: \|
	apt-get update
	apt-get install -y build-essential git cmake libcurl4-openssl-dev

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ubuntu-22-cmake-musa
	evict-old-files: 1d

	- name: Build with native CMake MUSA support
	id: cmake_build
	run: \|
	cmake -B build -S . \
	-DGGML_MUSA=ON
	cmake --build build --config Release -j $(nproc)

	ubuntu-22-cmake-sycl:
	runs-on: ubuntu-22.04

	continue-on-error: true

	steps:
	- uses: actions/checkout@v4

	- name: add oneAPI to apt
	shell: bash
	run: \|
	cd /tmp
	wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"

	- name: install oneAPI dpcpp compiler
	shell: bash
	run: \|
	sudo apt update
	sudo apt install intel-oneapi-compiler-dpcpp-cpp libcurl4-openssl-dev

	- name: install oneAPI MKL library
	shell: bash
	run: \|
	sudo apt install intel-oneapi-mkl-devel

	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ubuntu-22-cmake-sycl
	evict-old-files: 1d

	- name: Build
	id: cmake_build
	run: \|
	source /opt/intel/oneapi/setvars.sh
	cmake -B build \
	-DGGML_SYCL=ON \
	-DCMAKE_C_COMPILER=icx \
	-DCMAKE_CXX_COMPILER=icpx
	cmake --build build --config Release -j $(nproc)

	ubuntu-22-cmake-sycl-fp16:
	runs-on: ubuntu-22.04

	continue-on-error: true

	steps:
	- uses: actions/checkout@v4

	- name: add oneAPI to apt
	shell: bash
	run: \|
	cd /tmp
	wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"

	- name: install oneAPI dpcpp compiler
	shell: bash
	run: \|
	sudo apt update
	sudo apt install intel-oneapi-compiler-dpcpp-cpp libcurl4-openssl-dev

	- name: install oneAPI MKL library
	shell: bash
	run: \|
	sudo apt install intel-oneapi-mkl-devel

	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ubuntu-22-cmake-sycl-fp16
	evict-old-files: 1d

	- name: Build
	id: cmake_build
	run: \|
	source /opt/intel/oneapi/setvars.sh
	cmake -B build \
	-DGGML_SYCL=ON \
	-DCMAKE_C_COMPILER=icx \
	-DCMAKE_CXX_COMPILER=icpx \
	-DGGML_SYCL_F16=ON
	cmake --build build --config Release -j $(nproc)

	build-linux-cross:
	uses: ./.github/workflows/build-linux-cross.yml

	build-cmake-pkg:
	uses: ./.github/workflows/build-cmake-pkg.yml

	macOS-latest-cmake-ios:
	runs-on: macos-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: macOS-latest-cmake-ios
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	cmake -B build -G Xcode \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=ON \
	-DLLAMA_BUILD_COMMON=OFF \
	-DLLAMA_BUILD_EXAMPLES=OFF \
	-DLLAMA_BUILD_TOOLS=OFF \
	-DLLAMA_BUILD_TESTS=OFF \
	-DLLAMA_BUILD_SERVER=OFF \
	-DCMAKE_SYSTEM_NAME=iOS \
	-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
	-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
	cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

	macOS-latest-cmake-tvos:
	runs-on: macos-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: macOS-latest-cmake-tvos
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	cmake -B build -G Xcode \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=ON \
	-DLLAMA_BUILD_COMMON=OFF \
	-DLLAMA_BUILD_EXAMPLES=OFF \
	-DLLAMA_BUILD_TOOLS=OFF \
	-DLLAMA_BUILD_TESTS=OFF \
	-DLLAMA_BUILD_SERVER=OFF \
	-DCMAKE_SYSTEM_NAME=tvOS \
	-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
	-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
	cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

	macOS-latest-cmake-visionos:
	runs-on: macos-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	cmake -B build -G Xcode \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=ON \
	-DLLAMA_BUILD_COMMON=OFF \
	-DLLAMA_BUILD_EXAMPLES=OFF \
	-DLLAMA_BUILD_TOOLS=OFF \
	-DLLAMA_BUILD_TESTS=OFF \
	-DLLAMA_BUILD_SERVER=OFF \
	-DCMAKE_SYSTEM_NAME=visionOS \
	-DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
	-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
	cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

	macOS-latest-swift:
	runs-on: macos-latest
	needs: ios-xcode-build

	strategy:
	matrix:
	destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: macOS-latest-swift
	evict-old-files: 1d

	- name: Download xcframework artifact
	uses: actions/download-artifact@v4
	with:
	name: llama-xcframework
	path: build-apple/llama.xcframework/

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update

	- name: Build llama.cpp with CMake
	id: cmake_build
	run: \|
	sysctl -a
	cmake -B build -G Xcode \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=ON \
	-DLLAMA_CURL=OFF \
	-DLLAMA_BUILD_EXAMPLES=OFF \
	-DLLAMA_BUILD_TOOLS=OFF \
	-DLLAMA_BUILD_TESTS=OFF \
	-DLLAMA_BUILD_SERVER=OFF \
	-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
	cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

	windows-msys2:
	runs-on: windows-2025

	strategy:
	fail-fast: false
	matrix:
	include:
	- { sys: UCRT64, env: ucrt-x86_64, build: Release }
	- { sys: CLANG64, env: clang-x86_64, build: Release }

	steps:
	- name: Clone
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: windows-msys2
	variant: ccache
	evict-old-files: 1d

	- name: Setup ${{ matrix.sys }}
	uses: msys2/setup-msys2@v2
	with:
	update: true
	msystem: ${{matrix.sys}}
	install: >-
	base-devel
	git
	mingw-w64-${{matrix.env}}-toolchain
	mingw-w64-${{matrix.env}}-cmake
	mingw-w64-${{matrix.env}}-openblas

	- name: Build using CMake
	shell: msys2 {0}
	run: \|
	cmake -B build
	cmake --build build --config ${{ matrix.build }} -j $(nproc)

	- name: Clean after building using CMake
	shell: msys2 {0}
	run: \|
	rm -rf build

	- name: Build using CMake w/ OpenBLAS
	shell: msys2 {0}
	run: \|
	cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
	cmake --build build --config ${{ matrix.build }} -j $(nproc)

	windows-latest-cmake:
	runs-on: windows-2025

	env:
	OPENBLAS_VERSION: 0.3.23
	SDE_VERSION: 9.33.0-2024-01-07
	VULKAN_VERSION: 1.4.313.2

	strategy:
	matrix:
	include:
	- build: 'cpu-x64 (static)'
	arch: 'x64'
	defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF'
	- build: 'openblas-x64'
	arch: 'x64'
	defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
	- build: 'vulkan-x64'
	arch: 'x64'
	defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
	- build: 'llvm-arm64'
	arch: 'arm64'
	defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
	- build: 'llvm-arm64-opencl-adreno'
	arch: 'arm64'
	defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: windows-latest-cmake-${{ matrix.build }}
	variant: ccache
	evict-old-files: 1d

	- name: Download OpenBLAS
	id: get_openblas
	if: ${{ matrix.build == 'openblas-x64' }}
	run: \|
	curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
	curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
	mkdir $env:RUNNER_TEMP/openblas
	tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
	$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
	$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
	$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
	& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll

	- name: Install Vulkan SDK
	id: get_vulkan
	if: ${{ matrix.build == 'vulkan-x64' }}
	run: \|
	curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
	& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
	Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
	Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"

	- name: Install Ninja
	id: install_ninja
	run: \|
	choco install ninja

	- name: Install OpenCL Headers and Libs
	id: install_opencl
	if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
	run: \|
	git clone https://github.com/KhronosGroup/OpenCL-Headers
	cd OpenCL-Headers
	cmake -B build `
	-DBUILD_TESTING=OFF `
	-DOPENCL_HEADERS_BUILD_TESTING=OFF `
	-DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
	-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
	cmake --build build --target install
	git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
	cd OpenCL-ICD-Loader
	cmake -B build-arm64-release `
	-A arm64 `
	-DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
	-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
	cmake --build build-arm64-release --target install --config release

	- name: libCURL
	id: get_libcurl
	uses: ./.github/actions/windows-setup-curl
	with:
	architecture: ${{ matrix.arch == 'x64' && 'win64' \|\| 'win64a' }}

	- name: Build
	id: cmake_build
	env:
	CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
	run: \|
	cmake -S . -B build ${{ matrix.defines }} `
	-DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include"
	cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
	cp $env:CURL_PATH/bin/libcurl-*.dll build/bin/Release

	- name: Add libopenblas.dll
	id: add_libopenblas_dll
	if: ${{ matrix.build == 'openblas-x64' }}
	run: \|
	cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
	cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt

	- name: Test
	id: cmake_test
	if: ${{ matrix.arch == 'x64' }}
	run: \|
	cd build
	ctest -L main -C Release --verbose --timeout 900

	# TODO: disabled for now, consider adding tests for all CPU variants instead
	# - name: Test (Intel SDE)
	# id: cmake_test_sde
	# if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
	# run: \|
	# curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
	# # for some weird reason windows tar doesn't like sde tar.xz
	# 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
	# 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
	# $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
	# cd build
	# $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
	# & $sde -future -- ctest -L main -C Release --verbose --timeout 900

	ubuntu-latest-cmake-cuda:
	runs-on: ubuntu-latest
	container: nvidia/cuda:12.6.2-devel-ubuntu24.04

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Install dependencies
	env:
	DEBIAN_FRONTEND: noninteractive
	run: \|
	apt update
	apt install -y cmake build-essential ninja-build libgomp1 git libcurl4-openssl-dev

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ubuntu-latest-cmake-cuda
	evict-old-files: 1d

	- name: Build with CMake
	run: \|
	cmake -S . -B build -G Ninja \
	-DCMAKE_BUILD_TYPE=Release \
	-DCMAKE_CUDA_ARCHITECTURES=89-real \
	-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
	-DLLAMA_FATAL_WARNINGS=ON \
	-DGGML_NATIVE=OFF \
	-DGGML_CUDA=ON
	cmake --build build

	windows-2022-cmake-cuda:
	runs-on: windows-2022

	strategy:
	matrix:
	cuda: ['12.4']

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Install ccache
	uses: ggml-org/[email protected]
	with:
	key: windows-cuda-${{ matrix.cuda }}
	variant: ccache
	evict-old-files: 1d

	- name: Install Cuda Toolkit
	uses: ./.github/actions/windows-setup-cuda
	with:
	cuda_version: ${{ matrix.cuda }}

	- name: Install Ninja
	id: install_ninja
	run: \|
	choco install ninja

	- name: libCURL
	id: get_libcurl
	uses: ./.github/actions/windows-setup-curl

	- name: Build
	id: cmake_build
	shell: cmd
	env:
	CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
	run: \|
	call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
	cmake -S . -B build -G "Ninja Multi-Config" ^
	-DLLAMA_BUILD_SERVER=ON ^
	-DGGML_NATIVE=OFF ^
	-DGGML_BACKEND_DL=ON ^
	-DGGML_CPU_ALL_VARIANTS=ON ^
	-DGGML_CUDA=ON ^
	-DGGML_RPC=ON ^
	-DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include"
	set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
	cmake --build build --config Release -j %NINJA_JOBS% -t ggml
	cmake --build build --config Release

	windows-latest-cmake-sycl:
	runs-on: windows-2022

	defaults:
	run:
	shell: bash

	env:
	WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe
	WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
	ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: windows-latest-cmake-sycl
	variant: ccache
	evict-old-files: 1d

	- name: Install
	run: \|
	scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL

	# TODO: add libcurl support ; we will also need to modify win-build-sycl.bat to accept user-specified args

	- name: Build
	id: cmake_build
	run: examples/sycl/win-build-sycl.bat

	windows-latest-cmake-hip:
	runs-on: windows-2022

	env:
	# The ROCm version must correspond to the version used in the HIP SDK.
	ROCM_VERSION: "6.4.2"
	# Make sure this is in sync with build-cache.yml
	HIPSDK_INSTALLER_VERSION: "25.Q3"

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Grab rocWMMA package
	id: grab_rocwmma
	run: \|
	curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/${{ env.ROCM_VERSION }}/pool/main/r/rocwmma-dev/rocwmma-dev_1.7.0.60402-120~24.04_amd64.deb"
	7z x rocwmma.deb
	7z x data.tar

	- name: Use ROCm Installation Cache
	uses: actions/cache@v4
	id: cache-rocm
	with:
	path: C:\Program Files\AMD\ROCm
	key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}

	- name: Setup ROCm
	if: steps.cache-rocm.outputs.cache-hit != 'true'
	uses: ./.github/actions/windows-setup-rocm
	with:
	version: ${{ env.HIPSDK_INSTALLER_VERSION }}

	- name: Verify ROCm
	id: verify
	run: \|
	# Find and test ROCm installation
	$clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' \| Select-Object -First 1
	if (-not $clangPath) {
	Write-Error "ROCm installation not found"
	exit 1
	}
	& $clangPath.FullName --version

	- name: Install ccache
	uses: ggml-org/[email protected]
	with:
	key: ${{ github.job }}
	evict-old-files: 1d

	- name: libCURL
	id: get_libcurl
	uses: ./.github/actions/windows-setup-curl

	- name: Build
	id: cmake_build
	env:
	CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
	run: \|
	$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' \| split-path \| split-path)
	$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
	cmake -G "Unix Makefiles" -B build -S . `
	-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
	-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
	-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-${{ env.ROCM_VERSION }}/include/" `
	-DCMAKE_BUILD_TYPE=Release `
	-DROCM_DIR="${env:HIP_PATH}" `
	-DGGML_HIP=ON `
	-DGGML_HIP_ROCWMMA_FATTN=ON `
	-DGGML_RPC=ON `
	-DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include"
	cmake --build build -j ${env:NUMBER_OF_PROCESSORS}

	ios-xcode-build:
	runs-on: macos-latest

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Setup Xcode
	uses: maxim-lobanov/setup-xcode@v1
	with:
	xcode-version: latest-stable

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	cmake -B build -G Xcode \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=ON \
	-DLLAMA_CURL=OFF \
	-DLLAMA_BUILD_EXAMPLES=OFF \
	-DLLAMA_BUILD_TOOLS=OFF \
	-DLLAMA_BUILD_TESTS=OFF \
	-DLLAMA_BUILD_SERVER=OFF \
	-DCMAKE_SYSTEM_NAME=iOS \
	-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
	-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
	cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

	- name: xcodebuild for swift package
	id: xcodebuild
	run: \|
	./build-xcframework.sh

	- name: Upload xcframework artifact
	uses: actions/upload-artifact@v4
	with:
	name: llama-xcframework
	path: build-apple/llama.xcframework/
	retention-days: 1

	- name: Build Xcode project
	run: \|
	xcodebuild -downloadPlatform iOS
	xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build

	android-build:
	runs-on: ubuntu-latest

	steps:
	- name: Clone
	uses: actions/checkout@v4

	# Disabled due to size (400MB) and always 0 cache hits
	# - name: ccache
	# uses: ggml-org/[email protected]
	# with:
	# key: android-build
	# evict-old-files: 1d

	- name: Set up JDK
	uses: actions/setup-java@v3
	with:
	java-version: 17
	distribution: zulu

	- name: Setup Android SDK
	uses: android-actions/setup-android@v3
	with:
	log-accepted-android-sdk-licenses: false

	- name: Build
	run: \|
	cd examples/llama.android
	./gradlew build --no-daemon

	android-ndk-build:
	runs-on: ubuntu-latest

	env:
	OPENCL_VERSION: 2025.07.22

	strategy:
	matrix:
	include:
	- build: 'arm64-cpu'
	defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_CURL=OFF -D GGML_OPENMP=OFF'
	- build: 'arm64-snapdragon'
	defines: '--preset arm64-android-snapdragon-release'

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Install OpenCL Headers and Libs
	id: install_opencl
	if: ${{ matrix.build == 'arm64-snapdragon' }}
	run: \|
	mkdir opencl
	curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
	curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
	curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
	tar -xaf opencl/headers.tar.gz -C opencl
	tar -xaf opencl/clhpp.tar.gz -C opencl
	tar -xaf opencl/icd-loader.tar.gz -C opencl
	sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
	sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL
	cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION}
	cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared
	cmake --build build
	sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
	rm -rf opencl

	- name: Install Hexagon SDK
	id: install_hexsdk
	if: ${{ matrix.build == 'arm64-snapdragon' }}
	env:
	HEXSDK_VER: 6.4.0.2
	HEXTLS_VER: 19.0.04
	run: \|
	curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz
	mkdir hex-sdk
	tar -xaf hex-sdk.tar.gz -C hex-sdk
	ls -l hex-sdk
	sudo mv hex-sdk /opt/hexagon
	echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV"
	echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV"
	echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV"
	echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV"
	echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV"
	echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV"

	- name: Update CMake presets
	id: update_presets
	if: ${{ matrix.build == 'arm64-snapdragon' }}
	run: \|
	cp docs/backend/hexagon/CMakeUserPresets.json .

	- name: Build
	id: ndk_build
	run: \|
	cmake ${{ matrix.defines }} -B build
	cmake --build build
	cmake --install build --prefix pkg-adb/llama.cpp

	- name: Test
	id: cmake_test
	run: \|
	echo "FIXME: test on devices"

	openEuler-latest-cmake-cann:
	if: ${{ github.event_name != 'pull_request' \|\| contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
	defaults:
	run:
	shell: bash -el {0}
	strategy:
	matrix:
	arch: [x86, aarch64]
	cann:
	- '8.1.RC1.alpha001-910b-openeuler22.03-py3.10'
	device:
	- 'ascend910b3'
	build:
	- 'Release'
	runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' \|\| 'ubuntu-24.04' }}
	container: ascendai/cann:${{ matrix.cann }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Dependencies
	run: \|
	yum update -y
	yum install -y git gcc gcc-c++ make cmake libcurl-devel

	- name: Build
	run: \|
	export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}

	cmake -S . -B build \
	-DCMAKE_BUILD_TYPE=${{ matrix.build }} \
	-DGGML_CANN=on \
	-DSOC_TYPE=${{ matrix.device }}
	cmake --build build -j $(nproc)

	# TODO: simplify the following workflows using a matrix
	# TODO: run lighter CI on PRs and the full CI only on master (if needed)
	ggml-ci-x64-cpu-low-perf:
	runs-on: ubuntu-22.04

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ggml-ci-x64-cpu-low-perf
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential libcurl4-openssl-dev

	- name: Test
	id: ggml-ci
	run: \|
	LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

	ggml-ci-arm64-cpu-low-perf:
	runs-on: ubuntu-22.04-arm

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ggml-ci-arm64-cpu-low-perf
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential libcurl4-openssl-dev

	- name: Test
	id: ggml-ci
	run: \|
	LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

	ggml-ci-x64-cpu-high-perf:
	runs-on: ubuntu-22.04

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ggml-ci-x64-cpu-high-perf
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential libcurl4-openssl-dev

	- name: Test
	id: ggml-ci
	run: \|
	LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt

	ggml-ci-arm64-cpu-high-perf:
	runs-on: ubuntu-22.04-arm

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ggml-ci-arm64-cpu-high-perf
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential libcurl4-openssl-dev

	- name: Test
	id: ggml-ci
	run: \|
	LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

	ggml-ci-arm64-cpu-high-perf-sve:
	runs-on: ubuntu-22.04-arm

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ggml-ci-arm64-cpu-high-perf-sve
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential libcurl4-openssl-dev

	- name: Test
	id: ggml-ci
	run: \|
	LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

	ggml-ci-x64-nvidia-cuda:
	runs-on: [self-hosted, Linux, X64, NVIDIA]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Test
	id: ggml-ci
	run: \|
	nvidia-smi
	GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

	ggml-ci-x64-nvidia-vulkan-cm:
	runs-on: [self-hosted, Linux, X64, NVIDIA]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Test
	id: ggml-ci
	run: \|
	vulkaninfo --summary
	GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

	ggml-ci-x64-nvidia-vulkan-cm2:
	runs-on: [self-hosted, Linux, X64, NVIDIA, COOPMAT2]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Test
	id: ggml-ci
	run: \|
	vulkaninfo --summary
	GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

	ggml-ci-x64-cpu-amx:
	runs-on: [self-hosted, Linux, X64, CPU, AMX]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Test
	id: ggml-ci
	run: \|
	bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

	ggml-ci-mac-metal:
	runs-on: [self-hosted, macOS, ARM64]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Test
	id: ggml-ci
	run: \|
	GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

	ggml-ci-mac-vulkan:
	runs-on: [self-hosted, macOS, ARM64]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Test
	id: ggml-ci
	run: \|
	vulkaninfo --summary
	GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

	ggml-ci-arm64-cpu-kleidiai:
	runs-on: ubuntu-22.04-arm

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: ccache
	uses: ggml-org/[email protected]
	with:
	key: ggml-ci-arm64-cpu-kleidiai
	evict-old-files: 1d

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install -y build-essential libcurl4-openssl-dev

	- name: Test
	id: ggml-ci
	run: \|
	GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Add circular tiling support to pad, for Vulkan, CUDA, and CPU (used for making seamless textures) #29772

Workflow file

Add circular tiling support to pad, for Vulkan, CUDA, and CPU (used for making seamless textures) #29772

Uh oh!

Jobs

Run details

Workflow file for this run