Skip to content

Commit 5d3491e

Browse files
committed
Merge branch 'master' into gg/llama-kv-cache
ggml-ci
2 parents a40ba49 + 5783575 commit 5d3491e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+4034
-208
lines changed

.editorconfig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,11 @@ indent_style = tab
4040
[examples/cvector-generator/*.txt]
4141
trim_trailing_whitespace = unset
4242
insert_final_newline = unset
43+
44+
[models/templates/*.jinja]
45+
indent_style = unset
46+
indent_size = unset
47+
end_of_line = unset
48+
charset = unset
49+
trim_trailing_whitespace = unset
50+
insert_final_newline = unset

.github/workflows/build.yml

Lines changed: 130 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ jobs:
4343
with:
4444
fetch-depth: 0
4545

46+
- name: ccache
47+
uses: hendrikmuhs/[email protected]
48+
with:
49+
key: macOS-latest-cmake-arm64
50+
evict-old-files: 1d
51+
4652
- name: Dependencies
4753
id: depends
4854
continue-on-error: true
@@ -108,6 +114,12 @@ jobs:
108114
with:
109115
fetch-depth: 0
110116

117+
- name: ccache
118+
uses: hendrikmuhs/[email protected]
119+
with:
120+
key: macOS-latest-cmake-x64
121+
evict-old-files: 1d
122+
111123
- name: Dependencies
112124
id: depends
113125
continue-on-error: true
@@ -172,6 +184,12 @@ jobs:
172184
with:
173185
fetch-depth: 0
174186

187+
- name: ccache
188+
uses: hendrikmuhs/[email protected]
189+
with:
190+
key: ubuntu-cpu-cmake
191+
evict-old-files: 1d
192+
175193
- name: Dependencies
176194
id: depends
177195
run: |
@@ -249,6 +267,12 @@ jobs:
249267
id: checkout
250268
uses: actions/checkout@v4
251269

270+
- name: ccache
271+
uses: hendrikmuhs/[email protected]
272+
with:
273+
key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
274+
evict-old-files: 1d
275+
252276
- name: Dependencies
253277
id: depends
254278
run: |
@@ -296,6 +320,12 @@ jobs:
296320
id: checkout
297321
uses: actions/checkout@v4
298322

323+
- name: ccache
324+
uses: hendrikmuhs/[email protected]
325+
with:
326+
key: ubuntu-latest-cmake-rpc
327+
evict-old-files: 1d
328+
299329
- name: Dependencies
300330
id: depends
301331
run: |
@@ -325,6 +355,12 @@ jobs:
325355
id: checkout
326356
uses: actions/checkout@v4
327357

358+
- name: ccache
359+
uses: hendrikmuhs/[email protected]
360+
with:
361+
key: ubuntu-22-cmake-vulkan
362+
evict-old-files: 1d
363+
328364
- name: Dependencies
329365
id: depends
330366
run: |
@@ -364,6 +400,12 @@ jobs:
364400
sudo apt-get update
365401
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
366402
403+
- name: ccache
404+
uses: hendrikmuhs/[email protected]
405+
with:
406+
key: ubuntu-22-cmake-hip
407+
evict-old-files: 1d
408+
367409
- name: Build with native CMake HIP support
368410
id: cmake_build
369411
run: |
@@ -396,6 +438,12 @@ jobs:
396438
apt-get update
397439
apt-get install -y build-essential git cmake libcurl4-openssl-dev
398440
441+
- name: ccache
442+
uses: hendrikmuhs/[email protected]
443+
with:
444+
key: ubuntu-22-cmake-musa
445+
evict-old-files: 1d
446+
399447
- name: Build with native CMake MUSA support
400448
id: cmake_build
401449
run: |
@@ -435,6 +483,12 @@ jobs:
435483
id: checkout
436484
uses: actions/checkout@v4
437485

486+
- name: ccache
487+
uses: hendrikmuhs/[email protected]
488+
with:
489+
key: ubuntu-22-cmake-sycl
490+
evict-old-files: 1d
491+
438492
- name: Build
439493
id: cmake_build
440494
run: |
@@ -479,6 +533,12 @@ jobs:
479533
id: checkout
480534
uses: actions/checkout@v4
481535

536+
- name: ccache
537+
uses: hendrikmuhs/[email protected]
538+
with:
539+
key: ubuntu-22-cmake-sycl-fp16
540+
evict-old-files: 1d
541+
482542
- name: Build
483543
id: cmake_build
484544
run: |
@@ -500,6 +560,12 @@ jobs:
500560
id: checkout
501561
uses: actions/checkout@v4
502562

563+
- name: ccache
564+
uses: hendrikmuhs/[email protected]
565+
with:
566+
key: macOS-latest-cmake-ios
567+
evict-old-files: 1d
568+
503569
- name: Dependencies
504570
id: depends
505571
continue-on-error: true
@@ -531,6 +597,12 @@ jobs:
531597
id: checkout
532598
uses: actions/checkout@v4
533599

600+
- name: ccache
601+
uses: hendrikmuhs/[email protected]
602+
with:
603+
key: macOS-latest-cmake-tvos
604+
evict-old-files: 1d
605+
534606
- name: Dependencies
535607
id: depends
536608
continue-on-error: true
@@ -566,6 +638,12 @@ jobs:
566638
id: checkout
567639
uses: actions/checkout@v4
568640

641+
- name: ccache
642+
uses: hendrikmuhs/[email protected]
643+
with:
644+
key: macOS-latest-swift
645+
evict-old-files: 1d
646+
569647
- name: Dependencies
570648
id: depends
571649
continue-on-error: true
@@ -607,6 +685,12 @@ jobs:
607685
- name: Clone
608686
uses: actions/checkout@v4
609687

688+
- name: ccache
689+
uses: hendrikmuhs/[email protected]
690+
with:
691+
key: windows-msys2
692+
evict-old-files: 1d
693+
610694
- name: Setup ${{ matrix.sys }}
611695
uses: msys2/setup-msys2@v2
612696
with:
@@ -675,6 +759,12 @@ jobs:
675759
with:
676760
fetch-depth: 0
677761

762+
- name: ccache
763+
uses: hendrikmuhs/[email protected]
764+
with:
765+
key: windows-latest-cmake-${{ matrix.build }}
766+
evict-old-files: 1d
767+
678768
- name: Clone Kompute submodule
679769
id: clone_kompute
680770
if: ${{ matrix.build == 'kompute-x64' }}
@@ -813,6 +903,8 @@ jobs:
813903
- name: Clone
814904
id: checkout
815905
uses: actions/checkout@v4
906+
with:
907+
fetch-depth: 0
816908

817909
- name: Install dependencies
818910
env:
@@ -821,6 +913,12 @@ jobs:
821913
apt update
822914
apt install -y cmake build-essential ninja-build libgomp1 git
823915
916+
- name: ccache
917+
uses: hendrikmuhs/[email protected]
918+
with:
919+
key: ubuntu-latest-cmake-cuda
920+
evict-old-files: 1d
921+
824922
- name: Build with CMake
825923
run: |
826924
cmake -S . -B build -G Ninja \
@@ -847,6 +945,12 @@ jobs:
847945
with:
848946
fetch-depth: 0
849947

948+
- name: Install ccache
949+
uses: hendrikmuhs/[email protected]
950+
with:
951+
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
952+
evict-old-files: 1d
953+
850954
- name: Install Cuda Toolkit 11.7
851955
if: ${{ matrix.cuda == '11.7' }}
852956
run: |
@@ -903,11 +1007,6 @@ jobs:
9031007
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
9041008
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
9051009
906-
- name: Install ccache
907-
uses: hendrikmuhs/[email protected]
908-
with:
909-
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
910-
9111010
- name: Install Ninja
9121011
id: install_ninja
9131012
run: |
@@ -987,6 +1086,12 @@ jobs:
9871086
with:
9881087
fetch-depth: 0
9891088

1089+
- name: ccache
1090+
uses: hendrikmuhs/[email protected]
1091+
with:
1092+
key: windows-latest-cmake-sycl
1093+
evict-old-files: 1d
1094+
9901095
- name: Install
9911096
run: |
9921097
scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
@@ -1066,9 +1171,10 @@ jobs:
10661171
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
10671172
10681173
- name: Install ccache
1069-
uses: hendrikmuhs/[email protected]
1174+
uses: hendrikmuhs/[email protected].16
10701175
with:
10711176
key: ${{ github.job }}
1177+
evict-old-files: 1d
10721178

10731179
- name: Build
10741180
id: cmake_build
@@ -1098,6 +1204,12 @@ jobs:
10981204
with:
10991205
fetch-depth: 0
11001206

1207+
- name: ccache
1208+
uses: hendrikmuhs/[email protected]
1209+
with:
1210+
key: windows-latest-cmake-hip-release
1211+
evict-old-files: 1d
1212+
11011213
- name: Install
11021214
id: depends
11031215
run: |
@@ -1195,6 +1307,12 @@ jobs:
11951307
- name: Clone
11961308
uses: actions/checkout@v4
11971309

1310+
- name: ccache
1311+
uses: hendrikmuhs/[email protected]
1312+
with:
1313+
key: android-build
1314+
evict-old-files: 1d
1315+
11981316
- name: Set up JDK
11991317
uses: actions/setup-java@v3
12001318
with:
@@ -1232,6 +1350,12 @@ jobs:
12321350
with:
12331351
fetch-depth: 0
12341352

1353+
- name: ccache
1354+
uses: hendrikmuhs/[email protected]
1355+
with:
1356+
key: release
1357+
evict-old-files: 1d
1358+
12351359
- name: Determine tag name
12361360
id: tag
12371361
shell: bash

.github/workflows/server.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ jobs:
205205
run: |
206206
cd examples/server/tests
207207
$env:PYTHONIOENCODING = ":replace"
208-
pytest -v -x
208+
pytest -v -x -m "not slow"
209209
210210
- name: Slow tests
211211
id: server_integration_tests_slow

Makefile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ TEST_TARGETS = \
5252
tests/test-arg-parser \
5353
tests/test-autorelease \
5454
tests/test-backend-ops \
55+
tests/test-chat \
5556
tests/test-chat-template \
5657
tests/test-double-float \
5758
tests/test-grammar-integration \
@@ -983,6 +984,7 @@ OBJ_COMMON = \
983984
$(DIR_COMMON)/ngram-cache.o \
984985
$(DIR_COMMON)/sampling.o \
985986
$(DIR_COMMON)/speculative.o \
987+
$(DIR_COMMON)/chat.o \
986988
$(DIR_COMMON)/build-info.o \
987989
$(DIR_COMMON)/json-schema-to-grammar.o
988990

@@ -1361,6 +1363,8 @@ llama-server: \
13611363
examples/server/httplib.h \
13621364
examples/server/index.html.hpp \
13631365
examples/server/loading.html.hpp \
1366+
common/chat.cpp \
1367+
common/chat.hpp \
13641368
common/chat-template.hpp \
13651369
common/json.hpp \
13661370
common/minja.hpp \
@@ -1471,6 +1475,11 @@ tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \
14711475
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
14721476
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
14731477

1478+
tests/test-chat: tests/test-chat.cpp \
1479+
$(OBJ_ALL)
1480+
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
1481+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1482+
14741483
tests/test-opt: tests/test-opt.cpp \
14751484
$(OBJ_GGML)
14761485
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
1818

1919
- **How to use [MTLResidencySet](https://developer.apple.com/documentation/metal/mtlresidencyset?language=objc) to keep the GPU memory active?** https://github.com/ggerganov/llama.cpp/pull/11427
2020
- **VS Code extension for FIM completions:** https://github.com/ggml-org/llama.vscode
21+
- Universal tool call support in `llama-server`: https://github.com/ggerganov/llama.cpp/pull/9639
2122
- Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim
2223
- Introducing GGUF-my-LoRA https://github.com/ggerganov/llama.cpp/discussions/10123
2324
- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggerganov/llama.cpp/discussions/9669

common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ add_library(${TARGET} STATIC
5656
arg.cpp
5757
arg.h
5858
base64.hpp
59+
chat.cpp
60+
chat.hpp
5961
chat-template.hpp
6062
common.cpp
6163
common.h

0 commit comments

Comments
 (0)