Fix CI workflow: Update build process to use CMake and fix patch file path

dipampaul17 · dipampaul17 · commit 1819d53191ae · 2025-05-16T12:00:38.000-07:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -26,13 +26,16 @@ jobs:
       run: |
         git clone https://github.com/ggerganov/llama.cpp || echo "llama.cpp already exists"
         cd llama.cpp
-        git apply ../patch/split_kv_quant.diff || echo "Patch already applied"
-        make -j # Skip Metal tests since GitHub Actions runs on Intel VMs
+        mkdir -p build
+        cd build
+        git apply ../../patch/split_kv_quant.diff || echo "Patch already applied"
+        cmake .. -DLLAMA_METAL=OFF -DLLAMA_AVX=ON -DLLAMA_AVX2=ON
+        cmake --build . --config Release -j
     
     - name: Smoke test (compilation only)
       run: |
-        cd llama.cpp
-        ./main -h | grep "kvq-key" && echo "✅ Patch successfully applied"
+        cd llama.cpp/build
+        ./bin/llama-cli -h | grep "kvq" && echo "✅ Patch successfully applied"
     
     - name: Python syntax check
       run: |
diff --git a/patch/split_kv_quant.diff b/patch/split_kv_quant.diff
@@ -1,23 +1,24 @@
-diff --git a/examples/common.cpp b/examples/common.cpp
+diff --git a/common/common.cpp b/common/common.cpp
 index abcdef1..1234567 100644
---- a/examples/common.cpp
-+++ b/examples/common.cpp
-@@ -123,6 +123,18 @@ void common_params_parser_init(const char * arg0, common_params * params, gpt_par
-                 params->cache_type_v = llama_kv_cache_type_from_str(value.c_str());
-             })
-         );
+--- a/common/common.cpp
++++ b/common/common.cpp
+@@ -123,6 +123,18 @@ void common_params_parser_init(common_params * params) {
+                 params->cache_type_v = llama_model_kv_cache_type_from_str(value.c_str());
+             });
+         }
 +
-+        add_opt(common_arg(
-+            {"--kvq"}, "BITS",
-+            "Set both KV cache key and value quantization to same bits\nallowed values: 4, 8\n(default: 16 for FP16)",
-+            [](common_params & params, const std::string & value) {
++        {
++            const auto & argp = gpt_params_args.add_arg({
++                "--kvq", "-kvq"
++            }, "BITS", "Set both KV cache key and value quantization to same bits\nallowed values: 4, 8\n(default: 16 for FP16)");
++            argp.action = [&](const std::string & value) {
 +                try {
 +                    int bits = std::stoi(value);
-+                    params->cache_type_k = bits == 4 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q8_0;
-+                    params->cache_type_v = bits == 4 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q8_0;
++                    params->cache_type_k = bits == 4 ? LLAMA_KV_CACHE_TYPE_Q4_0 : LLAMA_KV_CACHE_TYPE_Q8_0;
++                    params->cache_type_v = bits == 4 ? LLAMA_KV_CACHE_TYPE_Q4_0 : LLAMA_KV_CACHE_TYPE_Q8_0;
 +                } catch (const std::exception & e) {}
-+            })
-+        );
++            };
++        }
          
-         add_opt(common_arg(
-             {"--cont-batching"}, "",
+         // Add batching arguments
+         {