Skip to content

refactor: introduce modular architecture for llama-model.cpp #29

refactor: introduce modular architecture for llama-model.cpp

refactor: introduce modular architecture for llama-model.cpp #29

Workflow file for this run

# TEMPORARILY DISABLED: Investigating Docker build failures
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.
# GitHub recommends pinning actions to a commit SHA.
# To get a newer version, you will need to update the SHA.
# You can also reference a tag or branch, but the action may change without warning.
# name: Publish Docker image
# on:
# workflow_dispatch: # allows manual triggering
# schedule:
# # Rebuild daily rather than on every push because it is expensive
# - cron: '12 4 * * *'
# concurrency:
# group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
# cancel-in-progress: true
# # Fine-grant permission
# # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
# permissions:
# packages: write
# jobs:
# push_to_registry:
# name: Push Docker image to Docker Hub
# runs-on: ${{ matrix.config.runs_on }}
# env:
# COMMIT_SHA: ${{ github.sha }}
# strategy:
# fail-fast: false
# matrix:
# config:
# # Multi-stage build
# # Note: the arm64 images are failing, which prevents the amd64 images from being built
# # https://github.com/ggml-org/llama.cpp/issues/11888
# #- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
# - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
# - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
# - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
# - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
# - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
# - { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04-s390x" }
# # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
# #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
# steps:
# - name: Check out the repo
# uses: actions/checkout@v4
# with:
# fetch-depth: 0 # preserve git history, so we can determine the build number
# - name: Set up QEMU
# if: ${{ matrix.config.tag != 's390x' }}
# uses: docker/setup-qemu-action@v3
# with:
# image: tonistiigi/binfmt:qemu-v7.0.0-28
# - name: Set up Docker Buildx
# uses: docker/setup-buildx-action@v3
# - name: Log in to Docker Hub
# uses: docker/login-action@v2
# with:
# registry: ghcr.io
# username: ${{ github.repository_owner }}
# password: ${{ secrets.GITHUB_TOKEN }}
# - name: Determine source tag name
# id: srctag
# uses: ./.github/actions/get-tag-name
# env:
# BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
# - name: Determine image tag name
# id: tag
# shell: bash
# run: |
# REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
# REPO_NAME="${{ github.event.repository.name }}"
# # list all tags possible
# if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
# TYPE=""
# else
# TYPE="-${{ matrix.config.tag }}"
# fi
# PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
# CACHETAGS="${PREFIX}buildcache${TYPE}"
# FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
# LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
# SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
# echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT
# echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
# echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
# echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
# echo "cache_output_tags=$CACHETAGS" # print out for debugging
# echo "full_output_tags=$FULLTAGS" # print out for debugging
# echo "light_output_tags=$LIGHTTAGS" # print out for debugging
# echo "server_output_tags=$SERVERTAGS" # print out for debugging
# env:
# GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
# - name: Free Disk Space (Ubuntu)
# if: ${{ matrix.config.free_disk_space == true }}
# uses: ggml-org/[email protected]
# with:
# # this might remove tools that are actually needed,
# # if set to "true" but frees about 6 GB
# tool-cache: false
# # all of these default to true, but feel free to set to
# # "false" if necessary for your workflow
# android: true
# dotnet: true
# haskell: true
# large-packages: true
# docker-images: true
# swap-storage: true
# - name: Build and push Full Docker image (tagged + versioned)
# if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
# uses: docker/build-push-action@v6
# with:
# context: .
# push: true
# platforms: ${{ matrix.config.platforms }}
# # tag list is generated from step above
# tags: ${{ steps.tag.outputs.full_output_tags }}
# file: ${{ matrix.config.dockerfile }}
# target: full
# provenance: false
# # using github experimental cache
# #cache-from: type=gha
# #cache-to: type=gha,mode=max
# # return to this if the experimental github cache is having issues
# #cache-to: type=local,dest=/tmp/.buildx-cache
# #cache-from: type=local,src=/tmp/.buildx-cache
# # using registry cache (no storage limit)
# cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
# cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max
# - name: Build and push Light Docker image (tagged + versioned)
# if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
# uses: docker/build-push-action@v6
# with:
# context: .
# push: true
# platforms: ${{ matrix.config.platforms }}
# # tag list is generated from step above
# tags: ${{ steps.tag.outputs.light_output_tags }}
# file: ${{ matrix.config.dockerfile }}
# target: light
# provenance: false
# # using github experimental cache
# #cache-from: type=gha
# #cache-to: type=gha,mode=max
# # return to this if the experimental github cache is having issues
# #cache-to: type=local,dest=/tmp/.buildx-cache
# #cache-from: type=local,src=/tmp/.buildx-cache
# # using registry cache (no storage limit)
# cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
# cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max
# - name: Build and push Server Docker image (tagged + versioned)
# if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
# uses: docker/build-push-action@v6
# with:
# context: .
# push: true
# platforms: ${{ matrix.config.platforms }}
# # tag list is generated from step above
# tags: ${{ steps.tag.outputs.server_output_tags }}
# file: ${{ matrix.config.dockerfile }}
# target: server
# provenance: false
# # using github experimental cache
# #cache-from: type=gha
# #cache-to: type=gha,mode=max
# # return to this if the experimental github cache is having issues
# #cache-to: type=local,dest=/tmp/.buildx-cache
# #cache-from: type=local,src=/tmp/.buildx-cache
# # using registry cache (no storage limit)
# cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
# cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max
# create_tag:
# name: Create and push git tag
# runs-on: ubuntu-22.04
# permissions:
# contents: write
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v4
# with:
# fetch-depth: 0
# - name: Determine source tag name
# id: srctag
# uses: ./.github/actions/get-tag-name
# env:
# BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
# - name: Create and push git tag
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# run: |
# git tag ${{ steps.srctag.outputs.name }} || exit 0
# git push origin ${{ steps.srctag.outputs.name }} || exit 0