Skip to content

Commit 6ce23b6

Browse files
committed
Merge branch 'master' into llamacli-tools
2 parents 60bca9c + d07c621 commit 6ce23b6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+3726
-2496
lines changed

.github/workflows/build.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,8 @@ jobs:
374374
- name: Clone
375375
id: checkout
376376
uses: actions/checkout@v4
377+
with:
378+
fetch-depth: 0
377379

378380
- name: ccache
379381
uses: hendrikmuhs/[email protected]
@@ -1373,8 +1375,10 @@ jobs:
13731375

13741376
needs:
13751377
- ubuntu-cpu-cmake
1378+
- ubuntu-22-cmake-vulkan
13761379
- windows-latest-cmake
13771380
- windows-2019-cmake-cuda
1381+
- windows-latest-cmake-sycl
13781382
- windows-latest-cmake-hip-release
13791383
- macOS-latest-cmake-arm64
13801384
- macOS-latest-cmake-x64

.github/workflows/docker.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ jobs:
5151

5252
- name: Set up QEMU
5353
uses: docker/setup-qemu-action@v3
54+
with:
55+
image: tonistiigi/binfmt:qemu-v7.0.0-28
5456

5557
- name: Set up Docker Buildx
5658
uses: docker/setup-buildx-action@v3

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ examples/server/*.css.hpp
9898
examples/server/*.html.hpp
9999
examples/server/*.js.hpp
100100
examples/server/*.mjs.hpp
101+
examples/server/*.gz.hpp
101102
!build_64.sh
102103
!examples/*.bat
103104
!examples/*/*.kts

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1364,7 +1364,7 @@ llama-server: \
13641364
examples/server/index.html.hpp \
13651365
examples/server/loading.html.hpp \
13661366
common/chat.cpp \
1367-
common/chat.hpp \
1367+
common/chat.h \
13681368
common/chat-template.hpp \
13691369
common/json.hpp \
13701370
common/minja.hpp \

common/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,7 @@ add_library(${TARGET} STATIC
5757
arg.h
5858
base64.hpp
5959
chat.cpp
60-
chat.hpp
61-
chat-template.hpp
60+
chat.h
6261
common.cpp
6362
common.h
6463
console.cpp
@@ -68,7 +67,8 @@ add_library(${TARGET} STATIC
6867
llguidance.cpp
6968
log.cpp
7069
log.h
71-
minja.hpp
70+
minja/chat-template.hpp
71+
minja/minja.hpp
7272
ngram-cache.cpp
7373
ngram-cache.h
7474
sampling.cpp

common/arg.cpp

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "log.h"
44
#include "sampling.h"
5+
#include "chat.h"
56

67
#include <algorithm>
78
#include <climits>
@@ -2264,7 +2265,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22642265
).set_env("LLAMA_LOG_VERBOSITY"));
22652266
add_opt(common_arg(
22662267
{"--log-prefix"},
2267-
"Enable prefx in log messages",
2268+
"Enable prefix in log messages",
22682269
[](common_params &) {
22692270
common_log_set_prefix(common_log_main(), true);
22702271
}
@@ -2518,5 +2519,53 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
25182519
}
25192520
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
25202521

2522+
add_opt(common_arg(
2523+
{"--fim-qwen-1.5b-default"},
2524+
string_format("use default Qwen 2.5 Coder 1.5B (note: can download weights from the internet)"),
2525+
[](common_params & params) {
2526+
params.hf_repo = "ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF";
2527+
params.hf_file = "qwen2.5-coder-1.5b-q8_0.gguf";
2528+
params.port = 8012;
2529+
params.n_gpu_layers = 99;
2530+
params.flash_attn = true;
2531+
params.n_ubatch = 1024;
2532+
params.n_batch = 1024;
2533+
params.n_ctx = 0;
2534+
params.n_cache_reuse = 256;
2535+
}
2536+
).set_examples({LLAMA_EXAMPLE_SERVER}));
2537+
2538+
add_opt(common_arg(
2539+
{"--fim-qwen-3b-default"},
2540+
string_format("use default Qwen 2.5 Coder 3B (note: can download weights from the internet)"),
2541+
[](common_params & params) {
2542+
params.hf_repo = "ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF";
2543+
params.hf_file = "qwen2.5-coder-3b-q8_0.gguf";
2544+
params.port = 8012;
2545+
params.n_gpu_layers = 99;
2546+
params.flash_attn = true;
2547+
params.n_ubatch = 1024;
2548+
params.n_batch = 1024;
2549+
params.n_ctx = 0;
2550+
params.n_cache_reuse = 256;
2551+
}
2552+
).set_examples({LLAMA_EXAMPLE_SERVER}));
2553+
2554+
add_opt(common_arg(
2555+
{"--fim-qwen-7b-default"},
2556+
string_format("use default Qwen 2.5 Coder 7B (note: can download weights from the internet)"),
2557+
[](common_params & params) {
2558+
params.hf_repo = "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF";
2559+
params.hf_file = "qwen2.5-coder-7b-q8_0.gguf";
2560+
params.port = 8012;
2561+
params.n_gpu_layers = 99;
2562+
params.flash_attn = true;
2563+
params.n_ubatch = 1024;
2564+
params.n_batch = 1024;
2565+
params.n_ctx = 0;
2566+
params.n_cache_reuse = 256;
2567+
}
2568+
).set_examples({LLAMA_EXAMPLE_SERVER}));
2569+
25212570
return ctx_arg;
25222571
}

0 commit comments

Comments
 (0)