diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 031cdb09b1..12e1a25d9b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,10 @@ on: pull_request: branches: [ master ] +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + jobs: build: strategy: @@ -57,3 +61,211 @@ jobs: mkdir test-cmake cmake -S examples/test-cmake -B test-cmake -DCMAKE_PREFIX_PATH=${{ github.workspace }}/installed ${{ contains(matrix.os, 'windows') && '-A x64' || '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++' }} cmake --build test-cmake ${{ contains(matrix.os, 'windows') && '--config Release' || '' }} + +# TODO: simplify the following workflows using a matrix + ggml-ci-x64-cpu-low-perf: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-x64-cpu-low-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-arm64-cpu-low-perf: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-low-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-x64-cpu-high-perf: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-x64-cpu-high-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-arm64-cpu-high-perf: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-high-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-arm64-cpu-high-perf-sve: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-high-perf-sve + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-x64-nvidia-cuda: + runs-on: [self-hosted, Linux, X64, NVIDIA] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + nvidia-smi + GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/ggml /mnt/ggml + + ggml-ci-x64-nvidia-vulkan-cm: + runs-on: [self-hosted, Linux, X64, NVIDIA] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/ggml /mnt/ggml + + ggml-ci-x64-nvidia-vulkan-cm2: + runs-on: [self-hosted, Linux, X64, NVIDIA, COOPMAT2] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/ggml /mnt/ggml + + ggml-ci-x64-cpu-amx: + runs-on: [self-hosted, Linux, X64, CPU, AMX] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + bash ./ci/run.sh ~/results/ggml /mnt/ggml + + ggml-ci-mac-metal: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/ggml ~/mnt/ggml + + ggml-ci-mac-vulkan: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/ggml ~/mnt/ggml diff --git a/ci/run.sh b/ci/run.sh index 66f15cd7eb..e8390fa42e 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -36,15 +36,35 @@ SRC=`pwd` CMAKE_EXTRA="" CTEST_EXTRA="" +if [ ! -z ${GG_BUILD_METAL} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON" +fi + if [ ! -z ${GG_BUILD_CUDA} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON" + + if command -v nvidia-smi >/dev/null 2>&1; then + CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d '.') + if [[ -n "$CUDA_ARCH" && "$CUDA_ARCH" =~ ^[0-9]+$ ]]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH}" + else + echo "Warning: Using fallback CUDA architectures" + CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=61;70;75;80;86;89" + fi + else + echo "Error: nvidia-smi not found, cannot build with CUDA" + exit 1 + fi fi -if [ ! -z ${GG_BUILD_METAL} ]; then - # TODO: this should use -DGGML_METAL_SHADER_DEBUG=ON instead, but currently it fails because - # the binaries cannot locate default.metallib eventhough it is in bin/. cannot figure out - # why this is happening, so temporary workaround is to use -DGGML_METAL_EMBED_LIBRARY=ON - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON" +if [ ! -z ${GG_BUILD_ROCM} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_HIP=ON" + if [ -z ${GG_BUILD_AMDGPU_TARGETS} ]; then + echo "Missing GG_BUILD_AMDGPU_TARGETS, please set it to your GPU architecture (e.g. gfx90a, gfx1100, etc.)" + exit 1 + fi + + CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}" fi if [ ! -z ${GG_BUILD_SYCL} ]; then @@ -53,13 +73,40 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then echo "source /opt/intel/oneapi/setvars.sh" exit 1 fi + # Use only main GPU export ONEAPI_DEVICE_SELECTOR="level_zero:0" + # Enable sysman for correct memory reporting export ZES_ENABLE_SYSMAN=1 - # No plan to implement backward pass for now / disable test-opt - CTEST_EXTRA="-E test-opt" + # to circumvent precision issues on CPY operations + export SYCL_PROGRAM_COMPILE_OPTIONS="-cl-fp32-correctly-rounded-divide-sqrt" CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON" fi +if [ ! -z ${GG_BUILD_VULKAN} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1" + + # if on Mac, disable METAL + if [[ "$OSTYPE" == "darwin"* ]]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=OFF -DGGML_BLAS=OFF" + fi + +fi + +if [ ! -z ${GG_BUILD_WEBGPU} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_WEBGPU=1" +fi + +if [ ! -z ${GG_BUILD_MUSA} ]; then + # Use qy1 by default (MTT S80) + MUSA_ARCH=${MUSA_ARCH:-21} + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}" +fi + +if [ ! -z ${GG_BUILD_NO_SVE} ]; then + # arm 9 and newer enables sve by default, adjust these flags depending on the cpu used + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm" +fi + ## helpers # download a file if it does not exist or if it is outdated @@ -112,13 +159,9 @@ function gg_run_ctest_debug { set -e (time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log - - if [ ! -z ${GG_BUILD_METAL} ]; then - export GGML_METAL_PATH_RESOURCES="$(pwd)/bin" - fi + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log - (time ctest ${CTEST_EXTRA} --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log + (time ctest ${CTEST_EXTRA} --output-on-failure -E "test-opt|test-backend-ops" ) 2>&1 | tee -a $OUT/${ci}-ctest.log set +e } @@ -144,11 +187,7 @@ function gg_run_ctest_release { set -e (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log - - if [ ! -z ${GG_BUILD_METAL} ]; then - export GGML_METAL_PATH_RESOURCES="$(pwd)/bin" - fi + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log if [ -z $GG_BUILD_LOW_PERF ]; then (time ctest ${CTEST_EXTRA} --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log @@ -316,7 +355,7 @@ function gg_sum_yolo { ## main -if [ -z ${GG_BUILD_LOW_PERF} ]; then +if true ; then # Create symlink: ./ggml/models-mnt -> $MNT/models/models-mnt rm -rf ${SRC}/models-mnt mnt_models=${MNT}/models @@ -335,27 +374,17 @@ fi ret=0 -if [ -z ${GG_BUILD_SYCL}]; then - test $ret -eq 0 && gg_run ctest_debug -fi +test $ret -eq 0 && gg_run ctest_debug test $ret -eq 0 && gg_run ctest_release -if [ ! -z ${GG_BUILD_METAL} ]; then - export GGML_METAL_PATH_RESOURCES="${SRC}/build-ci-release/bin" -fi - -if [ -z ${GG_BUILD_NO_DOWNLOAD} ]; then - test $ret -eq 0 && gg_run gpt_2 - #test $ret -eq 0 && gg_run mnist - test $ret -eq 0 && gg_run sam - test $ret -eq 0 && gg_run yolo -fi +test $ret -eq 0 && gg_run gpt_2 +#test $ret -eq 0 && gg_run mnist +test $ret -eq 0 && gg_run sam +test $ret -eq 0 && gg_run yolo if [ -z $GG_BUILD_LOW_PERF ]; then - if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 16 ]; then - # run tests that require GPU with at least 16GB of VRAM - date - fi + # run tests meant for low-perf runners + date fi exit $ret