diff --git a/.github/workflows/menlo-build.yml b/.github/workflows/menlo-build.yml new file mode 100644 index 0000000000000..07f1c75c10d2b --- /dev/null +++ b/.github/workflows/menlo-build.yml @@ -0,0 +1,638 @@ +name: CI + +on: + push: + tags: ["b[0-9]+"] + paths: + [ + ".github/scripts/**", + ".github/workflows/build.yml", + ".github/workflows/menlo-build.yml", + "**/CMakeLists.txt", + "**/Makefile", + "menlo/Makefile", + "**/*.h", + "**/*.hpp", + "**/*.c", + "**/*.cpp", + "**/*.cu", + "**/*.cc", + "**/*.cxx", + "llama.cpp", + "!docs/**", + "!.gitignore", + "!README.md", + ] + workflow_dispatch: + +env: + VULKAN_VERSION: 1.3.261.1 + +jobs: + create-draft-release: + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + outputs: + upload_url: ${{ steps.create_release.outputs.upload_url }} + version: ${{ steps.get_version.outputs.version }} + permissions: + contents: write + steps: + - name: Extract tag name + id: get_version + run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/}" + env: + GITHUB_REF: ${{ github.ref }} + - name: Create Draft Release + id: create_release + uses: softprops/action-gh-release@v2 + with: + tag_name: ${{ github.ref_name }} + token: ${{ secrets.GITHUB_TOKEN }} + name: "${{ env.VERSION }}" + draft: true + generate_release_notes: true + prerelease: false + + build-and-test: + runs-on: ${{ matrix.runs-on }} + needs: [create-draft-release] + timeout-minutes: 270 + strategy: + fail-fast: false + matrix: + include: + - os: "linux" + name: "noavx-x64" + runs-on: "ubuntu-20-04" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "avx-x64" + runs-on: "ubuntu-20-04" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "avx512-x64" + runs-on: "ubuntu-20-04" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "noavx-cuda-cu11.7-x64" + runs-on: "ubuntu-20-04-cuda-11-7" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "avx2-cuda-cu11.7-x64" + runs-on: "ubuntu-20-04-cuda-11-7" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "avx-cuda-cu11.7-x64" + runs-on: "ubuntu-20-04-cuda-11-7" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "avx512-cuda-cu11.7-x64" + runs-on: "ubuntu-20-04-cuda-11-7" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "noavx-cuda-cu12.0-x64" + runs-on: "ubuntu-20-04-cuda-12-0" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "avx2-cuda-cu12.0-x64" + runs-on: "ubuntu-20-04-cuda-12-0" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "avx-cuda-cu12.0-x64" + runs-on: "ubuntu-20-04-cuda-12-0" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "avx512-cuda-cu12.0-x64" + runs-on: "ubuntu-20-04-cuda-12-0" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON " + run-e2e: false + vulkan: false + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "linux" + name: "vulkan-x64" + runs-on: "ubuntu-22-04" + cmake-flags: "-DBUILD_SHARED_LIBS=OFF -DGGML_VULKAN=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF " + run-e2e: false + vulkan: true + ccache: true + ccache-dir: "/home/runner/.ccache" + - os: "macos" + name: "x64" + runs-on: "macos-selfhosted-12" + cmake-flags: "-DCMAKE_BUILD_RPATH=\"@loader_path\" -DGGML_METAL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=ON" + run-e2e: false + vulkan: false + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "macos" + name: "arm64" + runs-on: "macos-selfhosted-12-arm64" + cmake-flags: "-DCMAKE_BUILD_RPATH=\"@loader_path\" -DGGML_METAL_EMBED_LIBRARY=ON -DBUILD_SHARED_LIBS=ON" + run-e2e: false + vulkan: false + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "noavx-cuda-cu12.0-x64" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DCMAKE_BUILD_TYPE='Release'" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "avx2-cuda-cu12.0-x64" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DCMAKE_BUILD_TYPE='Release'" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "avx-cuda-cu12.0-x64" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DCMAKE_BUILD_TYPE='Release'" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "avx512-cuda-cu12.0-x64" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DCMAKE_BUILD_TYPE='Release'" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "noavx-cuda-cu11.7-x64" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DCMAKE_BUILD_TYPE='Release'" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "avx2-cuda-cu11.7-x64" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DCMAKE_BUILD_TYPE='Release'" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "avx-cuda-cu11.7-x64" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DCMAKE_BUILD_TYPE='Release'" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "avx512-cuda-cu11.7-x64" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DCMAKE_BUILD_TYPE='Release'" + run-e2e: false + vulkan: false + ccache: true + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "avx2-x64" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl" + run-e2e: true + vulkan: false + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "noavx-x64" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_CURL=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl " + run-e2e: false + vulkan: false + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "avx-x64" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl " + run-e2e: true + vulkan: false + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "avx512-x64" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_CURL=OFF -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl " + run-e2e: false + vulkan: false + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + - os: "win" + name: "vulkan-x64" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DBUILD_SHARED_LIBS=OFF -DGGML_VULKAN=ON -DLLAMA_CURL=OFF -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl " + vulkan: true + run-e2e: false + ccache: false + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Replace our Makefile + run: | + cat menlo/Makefile | tee Makefile + + - name: use python for linux + continue-on-error: true + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install tools on Windows + if: runner.os == 'Windows' + run: | + choco install ccache awscli make ccache ninja -y + + - name: Install tools on Linux + if: runner.os == 'Linux' + run: | + sudo apt-get install -y ninja-build + python3 -m pip install awscli + + if [ "${{ matrix.os }}${{ matrix.name }}" == "linuxarm64" ]; then + sudo apt-get install -y ccache + exit 0 + fi + cd /tmp + wget https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz + tar -xvf ccache-4.10.2-linux-x86_64.tar.xz + sudo cp ccache-4.10.2-linux-x86_64/ccache /usr/bin/ccache + ccache -V + rm -rf /tmp/ccache-4.10.2-linux-x86_64.tar.xz /tmp/ccache-4.10.2-linux-x86_64 + + - name: Download ccache from s3 + if: runner.os == 'Windows' + continue-on-error: true + run: | + Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" + refreshenv + aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/ccache-data-${{ matrix.os }}-${{ matrix.name }} ${{ matrix.ccache-dir }} --endpoint ${{ secrets.MINIO_ENDPOINT }} --cli-read-timeout 0 + env: + AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" + AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + + - name: Download ccache from s3 + if: runner.os == 'Linux' + continue-on-error: true + run: | + aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/ccache-data-${{ matrix.os }}-${{ matrix.name }} ${{ matrix.ccache-dir }} --endpoint ${{ secrets.MINIO_ENDPOINT }} --cli-read-timeout 0 + env: + AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" + AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + + - name: Install coreutils macos + if: runner.os == 'macOS' + run: | + brew install coreutils + + - name: Prepare Vulkan SDK Linux + if: ${{ matrix.vulkan && (matrix.os == 'linux') }} + run: | + wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list + sudo apt-get update -y + sudo apt-get install -y build-essential vulkan-sdk + + - name: Prepare Vulkan SDK Windows + if: ${{ matrix.vulkan && (matrix.os == 'win') }} + continue-on-error: true + run: | + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" + & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install + Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" + Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" + + - name: Get Cer for code signing + if: runner.os == 'macOS' + run: base64 -d <<< "$CODE_SIGN_P12_BASE64" > /tmp/codesign.p12 + shell: bash + env: + CODE_SIGN_P12_BASE64: ${{ secrets.CODE_SIGN_P12_BASE64 }} + + - uses: apple-actions/import-codesign-certs@v2 + continue-on-error: true + if: runner.os == 'macOS' + with: + p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }} + p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }} + + - uses: actions/setup-dotnet@v3 + if: runner.os == 'Windows' + with: + dotnet-version: "8.0.x" + + - name: Add msbuild to PATH + if: runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1.13.0 + + - name: Build + id: build-and-test + run: | + make build-lib CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" + + - uses: 1arp/create-a-file-action@0.4.5 + with: + path: 'llama' + isAbsolutePath: false + file: 'version.txt' + content: | + name: ${{ matrix.os }}-${{ matrix.name }} + version: ${{needs.create-draft-release.outputs.version}} + + - name: Code Signing macOS + if: runner.os == 'macOS' + run: | + make codesign CODE_SIGN=true DEVELOPER_ID="${{ secrets.DEVELOPER_ID }}" + + - name: Install Quill for notarization + if: runner.os == 'macOS' + run: | + curl -sSfL https://raw.githubusercontent.com/anchore/quill/main/install.sh | sh -s -- -b /usr/local/bin + quill --version + + - name: Prepare notary key + if: runner.os == 'macOS' + run: | + base64 -d <<< "$NOTARIZE_P8_BASE64" > /tmp/notary-key.p8 + chmod 600 /tmp/notary-key.p8 + env: + NOTARIZE_P8_BASE64: ${{ secrets.NOTARIZE_P8_BASE64 }} + + - name: Notarize macOS binaries + if: runner.os == 'macOS' + run: | + make notarize NOTARIZE=true QUILL_NOTARY_KEY_ID="${{ secrets.NOTARY_KEY_ID }}" QUILL_NOTARY_ISSUER="${{ secrets.NOTARY_ISSUER }}" QUILL_NOTARY_KEY="/tmp/notary-key.p8" + + - name: Cleanup notary key + if: runner.os == 'macOS' + run: | + rm -f /tmp/notary-key.p8 + echo "Notary key cleaned up" + + - name: Code Signing Windows + if: runner.os == 'Windows' + shell: cmd + run: | + set PATH=%PATH%;%USERPROFILE%\.dotnet\tools + make codesign CODE_SIGN=true AZURE_KEY_VAULT_URI="${{ secrets.AZURE_KEY_VAULT_URI }}" AZURE_CLIENT_ID="${{ secrets.AZURE_CLIENT_ID }}" AZURE_TENANT_ID="${{ secrets.AZURE_TENANT_ID }}" AZURE_CLIENT_SECRET="${{ secrets.AZURE_CLIENT_SECRET }}" AZURE_CERT_NAME="${{ secrets.AZURE_CERT_NAME }}" + + - name: Package + run: | + cat llama/version.txt + make package + + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + name: llama-${{ matrix.os }}-${{ matrix.name }} + path: ./llama + + - name: Calculate SHA512 Checksum (macOS) + if: runner.os == 'macOS' + run: | + sha512sum ./llama.tar.gz | awk '{ print $1 }' > sha512.txt + size=$(stat -f%z ./llama.tar.gz) # Sử dụng -f%z cho macOS + echo "checksum=$(cat sha512.txt)" >> $GITHUB_ENV + echo "size=$size" >> $GITHUB_ENV + + - name: Calculate SHA512 Checksum (Windows) + if: runner.os == 'Windows' + shell: pwsh + run: | + CertUtil -hashfile ./llama.tar.gz SHA512 | Select-String -Pattern "^[0-9a-fA-F]+$" | Out-File sha512.txt + $size = (Get-Item ./llama.tar.gz).length + echo "checksum=$(Get-Content sha512.txt)" >> $env:GITHUB_ENV + echo "size=$size" >> $env:GITHUB_ENV + + - name: Calculate SHA512 Checksum (Linux) + if: runner.os == 'Linux' + run: | + sha512sum ./llama.tar.gz | awk '{ print $1 }' > sha512.txt + size=$(stat -c%s ./llama.tar.gz) + echo "checksum=$(cat sha512.txt)" >> $GITHUB_ENV + echo "size=$size" >> $GITHUB_ENV + + ## Write for matrix outputs workaround + - uses: cloudposse/github-action-matrix-outputs-write@v1 + id: out + with: + matrix-step-name: ${{ github.job }} + matrix-key: ${{ matrix.os }}-${{ matrix.name }} + outputs: |- + sha512: ${{ env.checksum }} + size: ${{ env.size }} + + - uses: actions/upload-release-asset@v1.0.1 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + asset_path: ./llama.tar.gz + asset_name: llama-${{ needs.create-draft-release.outputs.version }}-bin-${{ matrix.os }}-${{ matrix.name }}.tar.gz + asset_content_type: application/gzip + + - name: Upload ccache to s3 + continue-on-error: true + if: always() && runner.os == 'Windows' + run: | + Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" + refreshenv + aws s3 sync ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/ccache-data-${{ matrix.os }}-${{ matrix.name }} --endpoint ${{ secrets.MINIO_ENDPOINT }} + env: + AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" + AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + + - name: Upload ccache to s3 + continue-on-error: true + if: always() && runner.os == 'Linux' + run: | + aws s3 sync ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/ccache-data-${{ matrix.os }}-${{ matrix.name }} --endpoint ${{ secrets.MINIO_ENDPOINT }} + env: + AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" + AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + + - name: Remove Keychain + continue-on-error: true + if: always() && runner.os == 'macOS' + run: | + security delete-keychain signing_temp.keychain + + + ## Read matrix outputs + read: + runs-on: ubuntu-latest + needs: [build-and-test] + steps: + - uses: cloudposse/github-action-matrix-outputs-read@v1 + id: read + with: + matrix-step-name: build-and-test + outputs: + result: "${{ steps.read.outputs.result }}" + + create-checksum-file: + runs-on: ubuntu-20-04 + permissions: + contents: write + needs: [read, create-draft-release] + steps: + - name: Download cuda dependencies from s3 and create checksum + run: | + wget https://minio.menlo.ai:9000/cicd/dist/cuda-dependencies/12.0/linux/cuda.tar.gz -O /tmp/cudart-llama-bin-linux-cu12.0-x64.tar.gz + wget https://minio.menlo.ai:9000/cicd/dist/cuda-dependencies/11.7/linux/cuda.tar.gz -O /tmp/cudart-llama-bin-linux-cu11.7-x64.tar.gz + wget https://minio.menlo.ai:9000/cicd/dist/cuda-dependencies/12.0/windows/cuda.tar.gz -O /tmp/cudart-llama-bin-win-cu12.0-x64.tar.gz + wget https://minio.menlo.ai:9000/cicd/dist/cuda-dependencies/11.7/windows/cuda.tar.gz -O /tmp/cudart-llama-bin-win-cu11.7-x64.tar.gz + + version=${{ needs.create-draft-release.outputs.version }} + outputs=${{ toJson(needs.read.outputs.result) }} + + echo $outputs + + echo "version: $version" > checksum.yml + echo "files:" >> checksum.yml + + echo "$outputs" | jq -r --arg version "$version" ' + .sha512 as $sha512 | + .size as $size | + (.sha512 | keys[]) as $key | + "- url: llama-\($version)-\($key).tar.gz\n sha512: >-\n \($sha512[$key])\n size: \($size[$key])" + ' >> checksum.yml + + echo "- url: cudart-llama-bin-linux-cu12.0-x64.tar.gz" >> checksum.yml + echo " sha512: >-" >> checksum.yml + echo " $(sha512sum /tmp/cudart-llama-bin-linux-cu12.0-x64.tar.gz | awk '{ print $1 }')" >> checksum.yml + echo " size: $(stat -c%s /tmp/cudart-llama-bin-linux-cu12.0-x64.tar.gz)" >> checksum.yml + + echo "- url: cudart-llama-bin-linux-cu11.7-x64.tar.gz" >> checksum.yml + echo " sha512: >-" >> checksum.yml + echo " $(sha512sum /tmp/cudart-llama-bin-linux-cu11.7-x64.tar.gz | awk '{ print $1 }')" >> checksum.yml + echo " size: $(stat -c%s /tmp/cudart-llama-bin-linux-cu11.7-x64.tar.gz)" >> checksum.yml + + echo "- url: cudart-llama-bin-win-cu11.7-x64.tar.gz" >> checksum.yml + echo " sha512: >-" >> checksum.yml + echo " $(sha512sum /tmp/cudart-llama-bin-win-cu11.7-x64.tar.gz | awk '{ print $1 }')" >> checksum.yml + echo " size: $(stat -c%s /tmp/cudart-llama-bin-win-cu11.7-x64.tar.gz)" >> checksum.yml + + echo "- url: cudart-llama-bin-win-cu12.0-x64.tar.gz" >> checksum.yml + echo " sha512: >-" >> checksum.yml + echo " $(sha512sum /tmp/cudart-llama-bin-win-cu12.0-x64.tar.gz | awk '{ print $1 }')" >> checksum.yml + echo " size: $(stat -c%s /tmp/cudart-llama-bin-win-cu12.0-x64.tar.gz)" >> checksum.yml + cat checksum.yml + + - name: Upload checksum.yml to GitHub Release + uses: actions/upload-release-asset@v1 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + asset_path: ./checksum.yml + asset_name: checksum.yml + asset_content_type: text/yaml + + - name: upload cudart-llama-bin-linux-cu12.0-x64.tar.gz to Github Release + uses: actions/upload-release-asset@v1 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + asset_path: /tmp/cudart-llama-bin-linux-cu12.0-x64.tar.gz + asset_name: cudart-llama-bin-linux-cu12.0-x64.tar.gz + asset_content_type: application/gzip + + - name: upload cudart-llama-bin-linux-cu11.7-x64.tar.gz to Github Release + uses: actions/upload-release-asset@v1 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + asset_path: /tmp/cudart-llama-bin-linux-cu11.7-x64.tar.gz + asset_name: cudart-llama-bin-linux-cu11.7-x64.tar.gz + asset_content_type: application/gzip + + - name: upload cudart-llama-bin-win-cu12.0-x64.tar.gz to Github Release + uses: actions/upload-release-asset@v1 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + asset_path: /tmp/cudart-llama-bin-win-cu12.0-x64.tar.gz + asset_name: cudart-llama-bin-win-cu12.0-x64.tar.gz + asset_content_type: application/gzip + + - name: upload cudart-llama-bin-win-cu11.7-x64.tar.gz to Github Release + uses: actions/upload-release-asset@v1 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + asset_path: /tmp/cudart-llama-bin-win-cu11.7-x64.tar.gz + asset_name: cudart-llama-bin-win-cu11.7-x64.tar.gz + asset_content_type: application/gzip \ No newline at end of file diff --git a/.github/workflows/upstream-sync.yml b/.github/workflows/upstream-sync.yml new file mode 100644 index 0000000000000..a77439c1ffc44 --- /dev/null +++ b/.github/workflows/upstream-sync.yml @@ -0,0 +1,174 @@ +name: Upstream Sync + +on: + schedule: + - cron: '0 0 * * *' # Daily at midnight UTC + workflow_dispatch: # Manual trigger + +jobs: + sync-and-update: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags + token: ${{ secrets.PAT_SERVICE_ACCOUNT }} + ref: master + + - name: Setup + run: | + git config --global user.name 'github-actions[bot]' + git config --global user.email 'github-actions[bot]@users.noreply.github.com' + git remote add upstream https://github.com/ggml-org/llama.cpp.git || true + + - name: Sync with latest release + id: sync + run: | + # Get latest release tag + LATEST_TAG=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | jq -r '.tag_name') + if [[ -z "$LATEST_TAG" || "$LATEST_TAG" == "null" ]]; then + echo "No valid release found. Exiting." + exit 1 + fi + echo "LATEST_TAG=$LATEST_TAG" >> $GITHUB_ENV + + # Fetch complete history from upstream + git fetch upstream --unshallow || git fetch upstream + + # Update master branch with latest release + git fetch upstream $LATEST_TAG + git checkout -B master FETCH_HEAD + + # Push the updated master branch + git push origin master --force + + # Count all commits for tagging + COMMIT_COUNT=$(git rev-list --count HEAD) + echo "COMMIT_COUNT=$COMMIT_COUNT" >> $GITHUB_ENV + echo "Total commits in master branch: $COMMIT_COUNT" + + - name: Create PR to dev + id: create_pr + env: + GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }} + run: | + git fetch origin dev + git diff --quiet master origin/dev || HAS_DIFF=$? + + if [ -z "$HAS_DIFF" ]; then + echo "No differences found between master and dev. Skipping PR creation." + echo "SKIP_PR=true" >> $GITHUB_ENV + exit 0 + else + echo "Found differences between master and dev. Creating PR..." + fi + + BRANCH_NAME="update-dev-from-master-$(date +'%Y-%m-%d-%H-%M')" + git checkout -b $BRANCH_NAME + + # Make sure branch contains master content before creating the PR + git merge --no-edit master + + git push origin $BRANCH_NAME --force + + PR_TITLE="Sync master with upstream release $LATEST_TAG" + PR_BODY="Updates dev branch with latest release ($LATEST_TAG) from ggml-org/llama.cpp" + + gh pr create \ + --repo menloresearch/llama.cpp \ + --title "$PR_TITLE" \ + --body "$PR_BODY" \ + --head "$BRANCH_NAME" \ + --base dev || PR_FAILED=$? + + echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV + + if [ ! -z "$PR_FAILED" ]; then + echo "Failed to create PR. Error code: $PR_FAILED" + echo "SKIP_PR=true" >> $GITHUB_ENV + exit 0 + fi + + PR_NUMBER=$(gh pr list --repo menloresearch/llama.cpp --head "$BRANCH_NAME" --json number --jq '.[0].number') + + if [[ -z "$PR_NUMBER" ]]; then + echo "Failed to get PR number" + echo "SKIP_PR=true" >> $GITHUB_ENV + exit 0 + fi + + echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV + + - name: Wait for CI checks + id: wait_for_ci + if: ${{ env.SKIP_PR != 'true' }} + env: + GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }} + run: | + echo "Waiting for CI checks to complete..." + while true; do + ci_completed=$(gh pr checks $PR_NUMBER --repo menloresearch/llama.cpp --json completedAt --jq '.[].completedAt') + + # If there are no CI checks, proceed with merge + if [[ -z "$ci_completed" ]]; then + echo "No CI checks detected. Proceeding with merge." + break + fi + + # Check if any checks are still running + if echo "$ci_completed" | grep -q "0001-01-01T00:00:00Z"; then + echo "CI is still running, waiting..." + sleep 60 + else + echo "CI has completed, checking states..." + ci_states=$(gh pr checks $PR_NUMBER --repo menloresearch/llama.cpp --json state --jq '.[].state') + if echo "$ci_states" | grep -vqE "SUCCESS|SKIPPED"; then + echo "CI failed, exiting..." + echo "SKIP_MERGE=true" >> $GITHUB_ENV + exit 0 + else + echo "CI passed, proceeding with merge..." + break + fi + fi + done + + - name: Merge PR and create tag + if: ${{ env.SKIP_PR != 'true' && env.SKIP_MERGE != 'true' }} + env: + GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }} + run: | + echo "Attempting to merge PR #$PR_NUMBER..." + gh pr merge $PR_NUMBER --repo menloresearch/llama.cpp --merge --admin || MERGE_FAILED=$? + + if [ ! -z "$MERGE_FAILED" ]; then + echo "Failed to merge PR. Error code: $MERGE_FAILED" + echo "Manual intervention required." + exit 0 + fi + + echo "PR merged successfully!" + + git fetch origin dev + git checkout -B dev origin/dev + + # Create tag using master branch commit count + TAG_NAME="b$COMMIT_COUNT" + + # Delete the tag if it already exists (both locally and remotely) + if git tag -l "$TAG_NAME" | grep -q "$TAG_NAME"; then + echo "Tag $TAG_NAME already exists. Deleting it first..." + git tag -d "$TAG_NAME" || true + git push --delete origin "$TAG_NAME" || true + echo "Existing tag deleted." + fi + + # Create tag on the current commit (dev branch HEAD after merge) + git tag "$TAG_NAME" + git push origin "$TAG_NAME" + echo "Tag $TAG_NAME created successfully (total commits from master: $COMMIT_COUNT)" \ No newline at end of file diff --git a/Makefile b/Makefile index bcbc770205956..8e0678ebe8c2f 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,77 @@ -define newline +# Makefile for Cortex llamacpp engine - Build, Lint, Test, and Clean +CMAKE_EXTRA_FLAGS ?= "" +RUN_TESTS ?= false +LLM_MODEL_URL ?= "https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf" +EMBEDDING_MODEL_URL ?= "https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf" +CODE_SIGN ?= false +AZURE_KEY_VAULT_URI ?= xxxx +AZURE_CLIENT_ID ?= xxxx +AZURE_TENANT_ID ?= xxxx +AZURE_CLIENT_SECRET ?= xxxx +AZURE_CERT_NAME ?= xxxx +DEVELOPER_ID ?= xxxx +NOTARIZE ?= false +QUILL_NOTARY_KEY_ID ?= "" +QUILL_NOTARY_ISSUER ?= "" +QUILL_NOTARY_KEY ?= "/tmp/notary-key.p8" -endef +# Default target, does nothing +all: + @echo "Specify a target to run" -$(error Build system changed:$(newline)\ -The Makefile build has been replaced by CMake.$(newline)$(newline)\ -For build instructions see:$(newline)\ -https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md$(newline)${newline}) +# Build the Cortex engine +build-lib: +ifeq ($(OS),Windows_NT) + @powershell -Command "cmake -B build $(CMAKE_EXTRA_FLAGS) -DLLAMA_BUILD_TESTS=OFF;" + @powershell -Command "cmake --build build --config Release -j4 --target llama-server;" +else ifeq ($(shell uname -s),Linux) + @cmake -B build $(CMAKE_EXTRA_FLAGS) -DLLAMA_BUILD_TESTS=OFF + @cmake --build build --config Release -j4 --target llama-server; +else + @cmake -B build $(CMAKE_EXTRA_FLAGS) -DLLAMA_BUILD_TESTS=OFF + @cmake --build build --config Release -j4 --target llama-server; +endif + +codesign: +ifeq ($(CODE_SIGN),false) + @echo "Skipping Code Sign" + @exit 0 +endif + +ifeq ($(OS),Windows_NT) + @powershell -Command "dotnet tool install --global AzureSignTool;" + @powershell -Command 'azuresigntool.exe sign -kvu "$(AZURE_KEY_VAULT_URI)" -kvi "$(AZURE_CLIENT_ID)" -kvt "$(AZURE_TENANT_ID)" -kvs "$(AZURE_CLIENT_SECRET)" -kvc "$(AZURE_CERT_NAME)" -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build\bin\llama-server.exe";' +else ifeq ($(shell uname -s),Linux) + @echo "Skipping Code Sign for linux" + @exit 0 +else + find "build/bin" -type f -exec codesign --force -s "$(DEVELOPER_ID)" --options=runtime {} \; +endif + +notarize: +ifeq ($(NOTARIZE),false) + @echo "Skipping Notarization" + @exit 0 +endif + +ifeq ($(OS),Windows_NT) + @echo "Skipping Notarization for Windows" + @exit 0 +else ifeq ($(shell uname -s),Linux) + @echo "Skipping Notarization for Linux" + @exit 0 +else + @echo "Starting notarization for macOS binaries..." + @find build/bin -type f -perm +111 -exec env QUILL_NOTARY_KEY_ID="$(QUILL_NOTARY_KEY_ID)" QUILL_NOTARY_ISSUER="$(QUILL_NOTARY_ISSUER)" QUILL_NOTARY_KEY="$(QUILL_NOTARY_KEY)" quill notarize {} \; + @echo "All macOS binaries notarized successfully" +endif + +package: +ifeq ($(OS),Windows_NT) + @powershell -Command "7z a -ttar temp.tar build\bin\*; 7z a -tgzip llama.tar.gz temp.tar;" +else ifeq ($(shell uname -s),Linux) + @tar -czvf llama.tar.gz build/bin; +else + @tar -czvf llama.tar.gz build/bin; +endif \ No newline at end of file diff --git a/menlo/Makefile b/menlo/Makefile new file mode 100644 index 0000000000000..8e0678ebe8c2f --- /dev/null +++ b/menlo/Makefile @@ -0,0 +1,77 @@ +# Makefile for Cortex llamacpp engine - Build, Lint, Test, and Clean + +CMAKE_EXTRA_FLAGS ?= "" +RUN_TESTS ?= false +LLM_MODEL_URL ?= "https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf" +EMBEDDING_MODEL_URL ?= "https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf" +CODE_SIGN ?= false +AZURE_KEY_VAULT_URI ?= xxxx +AZURE_CLIENT_ID ?= xxxx +AZURE_TENANT_ID ?= xxxx +AZURE_CLIENT_SECRET ?= xxxx +AZURE_CERT_NAME ?= xxxx +DEVELOPER_ID ?= xxxx +NOTARIZE ?= false +QUILL_NOTARY_KEY_ID ?= "" +QUILL_NOTARY_ISSUER ?= "" +QUILL_NOTARY_KEY ?= "/tmp/notary-key.p8" + +# Default target, does nothing +all: + @echo "Specify a target to run" + +# Build the Cortex engine +build-lib: +ifeq ($(OS),Windows_NT) + @powershell -Command "cmake -B build $(CMAKE_EXTRA_FLAGS) -DLLAMA_BUILD_TESTS=OFF;" + @powershell -Command "cmake --build build --config Release -j4 --target llama-server;" +else ifeq ($(shell uname -s),Linux) + @cmake -B build $(CMAKE_EXTRA_FLAGS) -DLLAMA_BUILD_TESTS=OFF + @cmake --build build --config Release -j4 --target llama-server; +else + @cmake -B build $(CMAKE_EXTRA_FLAGS) -DLLAMA_BUILD_TESTS=OFF + @cmake --build build --config Release -j4 --target llama-server; +endif + +codesign: +ifeq ($(CODE_SIGN),false) + @echo "Skipping Code Sign" + @exit 0 +endif + +ifeq ($(OS),Windows_NT) + @powershell -Command "dotnet tool install --global AzureSignTool;" + @powershell -Command 'azuresigntool.exe sign -kvu "$(AZURE_KEY_VAULT_URI)" -kvi "$(AZURE_CLIENT_ID)" -kvt "$(AZURE_TENANT_ID)" -kvs "$(AZURE_CLIENT_SECRET)" -kvc "$(AZURE_CERT_NAME)" -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build\bin\llama-server.exe";' +else ifeq ($(shell uname -s),Linux) + @echo "Skipping Code Sign for linux" + @exit 0 +else + find "build/bin" -type f -exec codesign --force -s "$(DEVELOPER_ID)" --options=runtime {} \; +endif + +notarize: +ifeq ($(NOTARIZE),false) + @echo "Skipping Notarization" + @exit 0 +endif + +ifeq ($(OS),Windows_NT) + @echo "Skipping Notarization for Windows" + @exit 0 +else ifeq ($(shell uname -s),Linux) + @echo "Skipping Notarization for Linux" + @exit 0 +else + @echo "Starting notarization for macOS binaries..." + @find build/bin -type f -perm +111 -exec env QUILL_NOTARY_KEY_ID="$(QUILL_NOTARY_KEY_ID)" QUILL_NOTARY_ISSUER="$(QUILL_NOTARY_ISSUER)" QUILL_NOTARY_KEY="$(QUILL_NOTARY_KEY)" quill notarize {} \; + @echo "All macOS binaries notarized successfully" +endif + +package: +ifeq ($(OS),Windows_NT) + @powershell -Command "7z a -ttar temp.tar build\bin\*; 7z a -tgzip llama.tar.gz temp.tar;" +else ifeq ($(shell uname -s),Linux) + @tar -czvf llama.tar.gz build/bin; +else + @tar -czvf llama.tar.gz build/bin; +endif \ No newline at end of file