From 2ce2bd5cf115e0b6f8b91878079adec61333b8a0 Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Mon, 22 Sep 2025 17:06:53 +0000 Subject: [PATCH 1/4] run the x64 ci on regular machines --- .github/workflows/build.yml | 32 ++++++++++++++++++++++++++++---- ci/run.sh | 22 +++++++++++----------- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index afdcdd07b05cc..675aa702a433b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1251,17 +1251,29 @@ jobs: # TODO: simplify the following workflows using a matrix # TODO: run lighter CI on PRs and the full CI only on master (if needed) ggml-ci-x64-cpu-low-perf: - runs-on: [self-hosted, Linux, X64, CPU, low-perf] + runs-on: ubuntu-22.04 steps: - name: Clone id: checkout uses: actions/checkout@v4 + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-x64-cpu-low-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + - name: Test id: ggml-ci run: | - bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-arm64-cpu-low-perf: runs-on: [self-hosted, Linux, ARM64, CPU, low-perf] @@ -1277,17 +1289,29 @@ jobs: bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp ggml-ci-x64-cpu-high-perf: - runs-on: [self-hosted, Linux, X64, CPU, high-perf] + runs-on: ubuntu-22.04 steps: - name: Clone id: checkout uses: actions/checkout@v4 + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-x64-cpu-high-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + - name: Test id: ggml-ci run: | - bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-arm64-cpu-high-perf: runs-on: [self-hosted, Linux, ARM64, CPU, high-perf] diff --git a/ci/run.sh b/ci/run.sh index cb90f7a7850fc..a86f663b85717 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -339,16 +339,16 @@ function gg_run_qwen3_0_6b { wiki_test="${path_wiki}/wiki.test.raw" - ./bin/llama-quantize ${model_bf16} ${model_q8_0} q8_0 - ./bin/llama-quantize ${model_bf16} ${model_q4_0} q4_0 - ./bin/llama-quantize ${model_bf16} ${model_q4_1} q4_1 - ./bin/llama-quantize ${model_bf16} ${model_q5_0} q5_0 - ./bin/llama-quantize ${model_bf16} ${model_q5_1} q5_1 - ./bin/llama-quantize ${model_bf16} ${model_q2_k} q2_k - ./bin/llama-quantize ${model_bf16} ${model_q3_k} q3_k - ./bin/llama-quantize ${model_bf16} ${model_q4_k} q4_k - ./bin/llama-quantize ${model_bf16} ${model_q5_k} q5_k - ./bin/llama-quantize ${model_bf16} ${model_q6_k} q6_k + ./bin/llama-quantize ${model_bf16} ${model_q8_0} q8_0 $(nproc) + ./bin/llama-quantize ${model_bf16} ${model_q4_0} q4_0 $(nproc) + ./bin/llama-quantize ${model_bf16} ${model_q4_1} q4_1 $(nproc) + ./bin/llama-quantize ${model_bf16} ${model_q5_0} q5_0 $(nproc) + ./bin/llama-quantize ${model_bf16} ${model_q5_1} q5_1 $(nproc) + ./bin/llama-quantize ${model_bf16} ${model_q2_k} q2_k $(nproc) + ./bin/llama-quantize ${model_bf16} ${model_q3_k} q3_k $(nproc) + ./bin/llama-quantize ${model_bf16} ${model_q4_k} q4_k $(nproc) + ./bin/llama-quantize ${model_bf16} ${model_q5_k} q5_k $(nproc) + ./bin/llama-quantize ${model_bf16} ${model_q6_k} q6_k $(nproc) (time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log (time ./bin/llama-cli -no-cnv --model ${model_bf16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-bf16.log @@ -421,7 +421,7 @@ function gg_run_qwen3_0_6b { function gg_sum_qwen3_0_6b { gg_printf '### %s\n\n' "${ci}" - gg_printf 'Pythia 2.8B:\n' + gg_printf 'Qwen3 0.6B:\n' gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" From 51d0b497d2057a581b7138fda13312591c5f51fb Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Mon, 22 Sep 2025 21:18:13 +0000 Subject: [PATCH 2/4] set up the same thing for arm fix test-quantize-perf just like #12306 --- .github/workflows/build.yml | 32 ++++++++++++++++++++++++++++---- tests/test-quantize-perf.cpp | 11 +---------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 675aa702a433b..98c0f8d985e41 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1276,17 +1276,29 @@ jobs: LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-arm64-cpu-low-perf: - runs-on: [self-hosted, Linux, ARM64, CPU, low-perf] + runs-on: ubuntu-22.04-arm steps: - name: Clone id: checkout uses: actions/checkout@v4 + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-low-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + - name: Test id: ggml-ci run: | - bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-x64-cpu-high-perf: runs-on: ubuntu-22.04 @@ -1314,17 +1326,29 @@ jobs: LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-arm64-cpu-high-perf: - runs-on: [self-hosted, Linux, ARM64, CPU, high-perf] + runs-on: ubuntu-22.04-arm steps: - name: Clone id: checkout uses: actions/checkout@v4 + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-high-perf + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + - name: Test id: ggml-ci run: | - GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-x64-nvidia-v100-cuda: runs-on: [self-hosted, Linux, X64, NVIDIA, V100] diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 2882884938388..cac0782dee9a9 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -260,14 +260,7 @@ int main(int argc, char * argv[]) { int64_t iterations = params.iterations; - - // Initialize GGML, ensures float conversion tables are initialized - struct ggml_init_params ggml_params = { - /* .mem_size = */ 1*1024, - /* .mem_buffer = */ NULL, - /* .no_alloc = */ true, - }; - struct ggml_context * ctx = ggml_init(ggml_params); + ggml_cpu_init(); for (int i = 0; i < GGML_TYPE_COUNT; i++) { ggml_type type = (ggml_type) i; @@ -359,7 +352,5 @@ int main(int argc, char * argv[]) { } } - ggml_free(ctx); - return 0; } From a9a68096ee2be7c1a4be46c264bc3451689b0fcd Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Tue, 23 Sep 2025 12:02:09 -0400 Subject: [PATCH 3/4] try to disable sve --- .github/workflows/build.yml | 2 +- ci/run.sh | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 98c0f8d985e41..5c22fd33e2392 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1348,7 +1348,7 @@ jobs: - name: Test id: ggml-ci run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-x64-nvidia-v100-cuda: runs-on: [self-hosted, Linux, X64, NVIDIA, V100] diff --git a/ci/run.sh b/ci/run.sh index a86f663b85717..7164b3f852169 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -103,6 +103,10 @@ if [ ! -z ${GG_BUILD_MUSA} ]; then MUSA_ARCH=${MUSA_ARCH:-21} CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}" fi + +if [ ! -z ${GG_BUILD_NO_SVE} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm" +fi ## helpers # download a file if it does not exist or if it is outdated From c2df226f26934455599e00b8f70ca9c3240e8392 Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Wed, 24 Sep 2025 11:28:11 -0400 Subject: [PATCH 4/4] add another sve run --- .github/workflows/build.yml | 25 +++++++++++++++++++++++++ ci/run.sh | 1 + 2 files changed, 26 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 768d880a3865c..4f70232b1777f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1350,6 +1350,31 @@ jobs: run: | LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + ggml-ci-arm64-cpu-high-perf-sve: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-high-perf-sve + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + ggml-ci-x64-nvidia-cuda: runs-on: [self-hosted, Linux, X64, NVIDIA] diff --git a/ci/run.sh b/ci/run.sh index 747b2cc401e4e..68cbfdf2f52aa 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -111,6 +111,7 @@ if [ ! -z ${GG_BUILD_MUSA} ]; then fi if [ ! -z ${GG_BUILD_NO_SVE} ]; then + # arm 9 and newer enables sve by default, adjust these flags depending on the cpu used CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm" fi ## helpers