Skip to content

Commit 1819d53

Browse files
committed
Fix CI workflow: Update build process to use CMake and fix patch file path
1 parent 86fb34c commit 1819d53

File tree

2 files changed

+25
-21
lines changed

2 files changed

+25
-21
lines changed

.github/workflows/ci.yml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,16 @@ jobs:
2626
run: |
2727
git clone https://github.com/ggerganov/llama.cpp || echo "llama.cpp already exists"
2828
cd llama.cpp
29-
git apply ../patch/split_kv_quant.diff || echo "Patch already applied"
30-
make -j # Skip Metal tests since GitHub Actions runs on Intel VMs
29+
mkdir -p build
30+
cd build
31+
git apply ../../patch/split_kv_quant.diff || echo "Patch already applied"
32+
cmake .. -DLLAMA_METAL=OFF -DLLAMA_AVX=ON -DLLAMA_AVX2=ON
33+
cmake --build . --config Release -j
3134
3235
- name: Smoke test (compilation only)
3336
run: |
34-
cd llama.cpp
35-
./main -h | grep "kvq-key" && echo "✅ Patch successfully applied"
37+
cd llama.cpp/build
38+
./bin/llama-cli -h | grep "kvq" && echo "✅ Patch successfully applied"
3639
3740
- name: Python syntax check
3841
run: |

patch/split_kv_quant.diff

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
1-
diff --git a/examples/common.cpp b/examples/common.cpp
1+
diff --git a/common/common.cpp b/common/common.cpp
22
index abcdef1..1234567 100644
3-
--- a/examples/common.cpp
4-
+++ b/examples/common.cpp
5-
@@ -123,6 +123,18 @@ void common_params_parser_init(const char * arg0, common_params * params, gpt_par
6-
params->cache_type_v = llama_kv_cache_type_from_str(value.c_str());
7-
})
8-
);
3+
--- a/common/common.cpp
4+
+++ b/common/common.cpp
5+
@@ -123,6 +123,18 @@ void common_params_parser_init(common_params * params) {
6+
params->cache_type_v = llama_model_kv_cache_type_from_str(value.c_str());
7+
});
8+
}
99
+
10-
+ add_opt(common_arg(
11-
+ {"--kvq"}, "BITS",
12-
+ "Set both KV cache key and value quantization to same bits\nallowed values: 4, 8\n(default: 16 for FP16)",
13-
+ [](common_params & params, const std::string & value) {
10+
+ {
11+
+ const auto & argp = gpt_params_args.add_arg({
12+
+ "--kvq", "-kvq"
13+
+ }, "BITS", "Set both KV cache key and value quantization to same bits\nallowed values: 4, 8\n(default: 16 for FP16)");
14+
+ argp.action = [&](const std::string & value) {
1415
+ try {
1516
+ int bits = std::stoi(value);
16-
+ params->cache_type_k = bits == 4 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q8_0;
17-
+ params->cache_type_v = bits == 4 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q8_0;
17+
+ params->cache_type_k = bits == 4 ? LLAMA_KV_CACHE_TYPE_Q4_0 : LLAMA_KV_CACHE_TYPE_Q8_0;
18+
+ params->cache_type_v = bits == 4 ? LLAMA_KV_CACHE_TYPE_Q4_0 : LLAMA_KV_CACHE_TYPE_Q8_0;
1819
+ } catch (const std::exception & e) {}
19-
+ })
20-
+ );
20+
+ };
21+
+ }
2122

22-
add_opt(common_arg(
23-
{"--cont-batching"}, "",
23+
// Add batching arguments
24+
{

0 commit comments

Comments
 (0)