diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dd4eff2c7fb..3643aaf279c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,6 +25,7 @@ on: '**/*.metal', '**/*.comp', '**/*.java'] + pull_request: types: [opened, synchronize, reopened] workflow_dispatch: @@ -1369,3 +1370,211 @@ jobs: shell: bash run: | ctest -R ^test-vad$ --test-dir build --output-on-failure -VV + +# TODO: simplify the following workflows using a matrix + ggml-ci-x64-cpu-low-perf: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-x64-cpu-low-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-arm64-cpu-low-perf: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-low-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-x64-cpu-high-perf: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-x64-cpu-high-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-arm64-cpu-high-perf: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-high-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-arm64-cpu-high-perf-sve: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-high-perf-sve + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-x64-nvidia-cuda: + runs-on: [self-hosted, Linux, X64, NVIDIA] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + nvidia-smi + GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp + + ggml-ci-x64-nvidia-vulkan-cm: + runs-on: [self-hosted, Linux, X64, NVIDIA] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp + + ggml-ci-x64-nvidia-vulkan-cm2: + runs-on: [self-hosted, Linux, X64, NVIDIA, COOPMAT2] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp + + ggml-ci-x64-cpu-amx: + runs-on: [self-hosted, Linux, X64, CPU, AMX] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp + + ggml-ci-mac-metal: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp + + ggml-ci-mac-vulkan: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp diff --git a/.gitignore b/.gitignore index 0957376dd8b..957eeb75456 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ build/ build-*/ build_*/ +tmp/ # SPM .build/ @@ -62,4 +63,4 @@ cmake-build-debug/ .gradle/ local.properties .log -.exe \ No newline at end of file +.exe diff --git a/ci/run.sh b/ci/run.sh index 6c770416e2f..d98a3d86047 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -24,9 +24,9 @@ mkdir -p "$2" OUT=$(realpath "$1") MNT=$(realpath "$2") -rm -f "$OUT/*.log" -rm -f "$OUT/*.exit" -rm -f "$OUT/*.md" +rm -vf $OUT/*.log +rm -vf $OUT/*.exit +rm -vf $OUT/*.md sd=`dirname $0` cd $sd/../ @@ -50,8 +50,35 @@ fi CMAKE_EXTRA="-DWHISPER_FATAL_WARNINGS=ON" +if [ ! -z ${GG_BUILD_METAL} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON" +fi + if [ ! -z ${GG_BUILD_CUDA} ]; then - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native" + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON" + + if command -v nvidia-smi >/dev/null 2>&1; then + CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d '.') + if [[ -n "$CUDA_ARCH" && "$CUDA_ARCH" =~ ^[0-9]+$ ]]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH}" + else + echo "Warning: Using fallback CUDA architectures" + CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=61;70;75;80;86;89" + fi + else + echo "Error: nvidia-smi not found, cannot build with CUDA" + exit 1 + fi +fi + +if [ ! -z ${GG_BUILD_ROCM} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_HIP=ON" + if [ -z ${GG_BUILD_AMDGPU_TARGETS} ]; then + echo "Missing GG_BUILD_AMDGPU_TARGETS, please set it to your GPU architecture (e.g. gfx90a, gfx1100, etc.)" + exit 1 + fi + + CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}" fi if [ ! -z ${GG_BUILD_SYCL} ]; then @@ -60,28 +87,38 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then echo "source /opt/intel/oneapi/setvars.sh" exit 1 fi - - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON" + # Use only main GPU + export ONEAPI_DEVICE_SELECTOR="level_zero:0" + # Enable sysman for correct memory reporting + export ZES_ENABLE_SYSMAN=1 + # to circumvent precision issues on CPY operations + export SYCL_PROGRAM_COMPILE_OPTIONS="-cl-fp32-correctly-rounded-divide-sqrt" + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON" fi -if [ ! -z ${GG_BUILD_OPENVINO} ]; then - CMAKE_EXTRA="${CMAKE_EXTRA} -DWHISPER_OPENVINO=ON" -fi +if [ ! -z ${GG_BUILD_VULKAN} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1" + + # if on Mac, disable METAL + if [[ "$OSTYPE" == "darwin"* ]]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=OFF -DGGML_BLAS=OFF" + fi -if [ ! -z ${GG_BUILD_METAL} ]; then - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON" fi -if [ ! -z ${GG_BUILD_VULKAN} ]; then - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=ON" +if [ ! -z ${GG_BUILD_WEBGPU} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_WEBGPU=1" fi -if [ ! -z ${GG_BUILD_BLAS} ]; then - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_BLAS=ON" +if [ ! -z ${GG_BUILD_MUSA} ]; then + # Use qy1 by default (MTT S80) + MUSA_ARCH=${MUSA_ARCH:-21} + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}" fi -if [ ! -z ${GG_BUILD_COREML} ]; then - CMAKE_EXTRA="${CMAKE_EXTRA} -DWHISPER_COREML=ON" +if [ ! -z ${GG_BUILD_NO_SVE} ]; then + # arm 9 and newer enables sve by default, adjust these flags depending on the cpu used + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm" fi ## helpers @@ -178,7 +215,7 @@ function gg_run_ctest { mode=$2 cd ${SRC} - + rm -rf build-ci-${mode} && mkdir build-ci-${mode} && cd build-ci-${mode} set -e @@ -219,7 +256,7 @@ function gg_run_bench { echo "Running memcpy benchmark" (time ./build-ci-release/bin/whisper-bench -w 1 -t $BENCH_N_THREADS 2>&1) | tee -a $OUT/${ci}-memcpy.log gg_check_last_command_status "$OUT/${ci}-memcpy.exit" "memcpy benchmark" - + echo "Running ggml_mul_mat benchmark with $BENCH_N_THREADS threads" (time ./build-ci-release/bin/whisper-bench -w 2 -t $BENCH_N_THREADS 2>&1) | tee -a $OUT/${ci}-mul_mat.log gg_check_last_command_status "$OUT/${ci}-mul_mat.exit" "ggml_mul_mat benchmark" @@ -233,6 +270,8 @@ function gg_run_bench { printf "| %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---" } | tee -a $OUT/${ci}-models-table.log + res=0 + # run benchmark for each model for model in "${MODELS[@]}"; do echo "Benchmarking model: $model" @@ -283,8 +322,11 @@ function gg_run_bench { | tee -a $OUT/${ci}-models-table.log else echo "Benchmark failed for model: $model" | tee -a $OUT/${ci}-bench-errors.log + res=1 fi done + + return $res } function gg_sum_bench { @@ -326,11 +368,12 @@ ret=0 for model in "${MODELS[@]}"; do test $ret -eq 0 && gg_download_model ${model} done -if [ -z ${GG_BUILD_SYCL}]; then - test $ret -eq 0 && gg_run ctest debug -fi + +test $ret -eq 0 && gg_run ctest debug test $ret -eq 0 && gg_run ctest release test $ret -eq 0 && gg_run bench +cat $OUT/README.md + exit $ret