CUDA: fix MMV kernel being used for FP16 src1 (#10357) #499
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # This workflow uses actions that are not certified by GitHub. | |
| # They are provided by a third-party and are governed by | |
| # separate terms of service, privacy policy, and support | |
| # documentation. | |
| # GitHub recommends pinning actions to a commit SHA. | |
| # To get a newer version, you will need to update the SHA. | |
| # You can also reference a tag or branch, but the action may change without warning. | |
| name: Publish Docker image | |
| on: | |
| #pull_request: | |
| push: | |
| branches: | |
| - master | |
| paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal'] | |
| workflow_dispatch: # allows manual triggering, useful for debugging | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
| cancel-in-progress: true | |
| # Fine-grant permission | |
| # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token | |
| permissions: | |
| packages: write | |
| jobs: | |
| push_to_registry: | |
| name: Push Docker image to Docker Hub | |
| #if: github.event.pull_request.draft == false | |
| runs-on: ubuntu-latest | |
| env: | |
| COMMIT_SHA: ${{ github.sha }} | |
| strategy: | |
| matrix: | |
| config: | |
| - { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
| - { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
| - { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
| - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" } | |
| - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" } | |
| - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" } | |
| - { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" } | |
| - { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" } | |
| - { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" } | |
| # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete | |
| #- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
| #- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
| #- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
| - { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" } | |
| - { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" } | |
| steps: | |
| - name: Check out the repo | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # preserve git history, so we can determine the build number | |
| - name: Set up QEMU | |
| uses: docker/setup-qemu-action@v2 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v2 | |
| - name: Log in to Docker Hub | |
| uses: docker/login-action@v2 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.repository_owner }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Determine tag name | |
| id: tag | |
| shell: bash | |
| run: | | |
| BUILD_NUMBER="$(git rev-list --count HEAD)" | |
| SHORT_HASH="$(git rev-parse --short=7 HEAD)" | |
| REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case | |
| REPO_NAME="${{ github.event.repository.name }}" | |
| # determine tag name postfix (build number, commit hash) | |
| if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then | |
| TAG_POSTFIX="b${BUILD_NUMBER}" | |
| else | |
| SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-') | |
| TAG_POSTFIX="${SAFE_NAME}-${SHORT_HASH}" | |
| fi | |
| # list all tags possible | |
| TAGS="" | |
| TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}," | |
| TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}-${TAG_POSTFIX}" | |
| echo "output_tags=$TAGS" >> $GITHUB_OUTPUT | |
| echo "output_tags=$TAGS" # print out for debugging | |
| env: | |
| GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
| GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' | |
| # https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example | |
| - name: Free Disk Space (Ubuntu) | |
| uses: jlumbroso/free-disk-space@main | |
| with: | |
| # this might remove tools that are actually needed, | |
| # if set to "true" but frees about 6 GB | |
| tool-cache: false | |
| # all of these default to true, but feel free to set to | |
| # "false" if necessary for your workflow | |
| android: true | |
| dotnet: true | |
| haskell: true | |
| large-packages: true | |
| docker-images: true | |
| swap-storage: true | |
| - name: Build and push Docker image (tagged + versioned) | |
| if: github.event_name == 'push' | |
| uses: docker/build-push-action@v6 | |
| with: | |
| context: . | |
| push: true | |
| platforms: ${{ matrix.config.platforms }} | |
| # tag list is generated from step above | |
| tags: ${{ steps.tag.outputs.output_tags }} | |
| file: ${{ matrix.config.dockerfile }} |