Skip to content

Release

Release #3

Workflow file for this run

name: Release
on:
workflow_dispatch: # allows manual triggering
inputs:
create_release:
description: 'Create new release'
required: true
type: boolean
push:
branches:
- master
paths: ['.github/workflows/release.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.m', '**/*.comp']
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON"
jobs:
# ubuntu-22-vulkan:
# if: false
# runs-on: ubuntu-22.04
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v4
# with:
# fetch-depth: 0
# - name: ccache
# uses: hendrikmuhs/ccache-action@v1.2.18
# with:
# key: ubuntu-22-cmake-vulkan
# evict-old-files: 1d
# - name: Dependencies
# id: depends
# run: |
# wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
# sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
# sudo apt-get update -y
# sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev
# - name: Build
# id: cmake_build
# run: |
# cmake -B build \
# -DGGML_VULKAN=ON \
# -DGGML_MAX_CONTEXTS=2048 \
# ${{ env.CMAKE_ARGS }}
# cmake --build build --config Release -j $(nproc)
# - name: Determine tag name
# id: tag
# uses: ./.github/actions/get-tag-name
# - name: Pack artifacts
# id: pack_artifacts
# run: |
# cp LICENSE ./build/bin/
# zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
# - name: Upload artifacts
# uses: actions/upload-artifact@v4
# with:
# path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
# name: llama-bin-ubuntu-vulkan-x64.zip
windows-cuda:
runs-on: windows-2022
strategy:
matrix:
cuda: ['12.6'] #
instr: ['avx2' ] # 'avx512', 'avx512bf16'
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
- name: Install ccache
uses: hendrikmuhs/ccache-action@v1.2.18
with:
key: windows-cuda-${{ matrix.cuda }}-${{ matrix.instr }}
variant: ccache
evict-old-files: 1d
- name: Install CUDA Toolkit
uses: ./.github/actions/windows-setup-cuda
with:
cuda_version: ${{ matrix.cuda }}
- name: Install Ninja
id: install_ninja
run: |
choco install ninja
- name: Build
id: cmake_build
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
cmake -S . -B build -G "Ninja Multi-Config" -DLLAMA_BUILD_SERVER=ON ^
-DGGML_CUDA_FA_ALL_QUANTS=ON -DGGML_CUDA=ON ^
-DGGML_OPENMP=ON -DGGML_AVX2=ON ^
-DGGML_AVX2=${{ (matrix.instr == 'avx2' || matrix.instr == 'avx512' || matrix.instr == 'avx512bf16') && 'ON' || 'OFF' }} ^
-DGGML_NATIVE=OFF -DLLAMA_CURL=OFF ^
-DGGML_SCHED_MAX_COPIES=1 ^
-DGGML_CUDA_IQK_FORCE_BF16=1 ^
-DGGML_MAX_CONTEXTS=4096
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
cmake --build build --config Release -j %NINJA_JOBS%
- name: Pack artifacts
id: pack_artifacts
run: |
7z a llama-bin-win-cuda-${{ matrix.cuda }}.zip .\build\bin\Release\*
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
path: llama-bin-win-cuda-${{ matrix.cuda }}.zip
name: llama-bin-win-cuda-${{ matrix.cuda }}.zip
release:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
# Fine-grant permission
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
permissions:
contents: write # for creating release
runs-on: ubuntu-latest
needs:
#- windows
- windows-cuda
#- windows-hip
#- ubuntu-22-cpu
#- ubuntu-22-vulkan
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Determine tag name
id: tag
uses: ./.github/actions/get-tag-name
- name: Download artifacts
id: download-artifact
uses: actions/download-artifact@v4
with:
path: ./artifact
merge-multiple: true
- name: Move artifacts
id: move_artifacts
# artifact/cudart-llama-bin-win-*.zip
run: |
mkdir -p release
echo "Adding cuda backend files to existing zips..."
for arch in x64; do
cpu_zip="artifact/llama-bin-win-${arch}.zip"
temp_dir=$(mktemp -d)
echo "Extracting CPU backend for $arch..."
unzip "$cpu_zip" -d "$temp_dir"
echo "Adding CPU files to $arch zips..."
for target_zip in artifact/llama-bin-win-*-${arch}.zip; do
if [[ "$target_zip" == "$cpu_zip" ]]; then
continue
fi
echo "Adding CPU backend to $(basename "$target_zip")"
realpath_target_zip=$(realpath "$target_zip")
(cd "$temp_dir" && zip -r "$realpath_target_zip" .)
done
rm -rf "$temp_dir"
done
echo "Renaming and moving zips to release..."
for zip_file in artifact/llama-bin-win-*.zip; do
base_name=$(basename "$zip_file" .zip)
zip_name="ik_llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip"
echo "Moving $zip_file to release/$zip_name"
mv "$zip_file" "release/$zip_name"
done
echo "Moving any remaining artifacts..."
mv -v artifact/*.zip release/ || true
- name: Create release
id: create_release
uses: Thireus/action-create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ steps.tag.outputs.name }}
- name: Upload release
id: upload_release
uses: actions/github-script@v3
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
const path = require('path');
const fs = require('fs');
const release_id = '${{ steps.create_release.outputs.id }}';
for (let file of await fs.readdirSync('./release')) {
if (path.extname(file) === '.zip') {
console.log('uploadReleaseAsset', file);
await github.repos.uploadReleaseAsset({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: release_id,
name: file,
data: await fs.readFileSync(`./release/${file}`)
});
}
}