Skip to content

Commit fa38b8e

Browse files
author
Olivier Chafik
committed
Merge remote-tracking branch 'origin/master' into cuda-releases
2 parents 614fd07 + a83f528 commit fa38b8e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+4143
-204
lines changed

.editorconfig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,11 @@ indent_style = tab
4040
[examples/cvector-generator/*.txt]
4141
trim_trailing_whitespace = unset
4242
insert_final_newline = unset
43+
44+
[models/templates/*.jinja]
45+
indent_style = unset
46+
indent_size = unset
47+
end_of_line = unset
48+
charset = unset
49+
trim_trailing_whitespace = unset
50+
insert_final_newline = unset

.github/workflows/build.yml

Lines changed: 128 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ jobs:
4343
with:
4444
fetch-depth: 0
4545

46+
- name: ccache
47+
uses: hendrikmuhs/[email protected]
48+
with:
49+
key: macOS-latest-cmake-arm64
50+
evict-old-files: 1d
51+
4652
- name: Dependencies
4753
id: depends
4854
continue-on-error: true
@@ -108,6 +114,12 @@ jobs:
108114
with:
109115
fetch-depth: 0
110116

117+
- name: ccache
118+
uses: hendrikmuhs/[email protected]
119+
with:
120+
key: macOS-latest-cmake-x64
121+
evict-old-files: 1d
122+
111123
- name: Dependencies
112124
id: depends
113125
continue-on-error: true
@@ -172,6 +184,12 @@ jobs:
172184
with:
173185
fetch-depth: 0
174186

187+
- name: ccache
188+
uses: hendrikmuhs/[email protected]
189+
with:
190+
key: ubuntu-cpu-cmake
191+
evict-old-files: 1d
192+
175193
- name: Dependencies
176194
id: depends
177195
run: |
@@ -249,6 +267,12 @@ jobs:
249267
id: checkout
250268
uses: actions/checkout@v4
251269

270+
- name: ccache
271+
uses: hendrikmuhs/[email protected]
272+
with:
273+
key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
274+
evict-old-files: 1d
275+
252276
- name: Dependencies
253277
id: depends
254278
run: |
@@ -296,6 +320,12 @@ jobs:
296320
id: checkout
297321
uses: actions/checkout@v4
298322

323+
- name: ccache
324+
uses: hendrikmuhs/[email protected]
325+
with:
326+
key: ubuntu-latest-cmake-rpc
327+
evict-old-files: 1d
328+
299329
- name: Dependencies
300330
id: depends
301331
run: |
@@ -325,6 +355,12 @@ jobs:
325355
id: checkout
326356
uses: actions/checkout@v4
327357

358+
- name: ccache
359+
uses: hendrikmuhs/[email protected]
360+
with:
361+
key: ubuntu-22-cmake-vulkan
362+
evict-old-files: 1d
363+
328364
- name: Dependencies
329365
id: depends
330366
run: |
@@ -364,6 +400,12 @@ jobs:
364400
sudo apt-get update
365401
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
366402
403+
- name: ccache
404+
uses: hendrikmuhs/[email protected]
405+
with:
406+
key: ubuntu-22-cmake-hip
407+
evict-old-files: 1d
408+
367409
- name: Build with native CMake HIP support
368410
id: cmake_build
369411
run: |
@@ -396,6 +438,12 @@ jobs:
396438
apt-get update
397439
apt-get install -y build-essential git cmake libcurl4-openssl-dev
398440
441+
- name: ccache
442+
uses: hendrikmuhs/[email protected]
443+
with:
444+
key: ubuntu-22-cmake-musa
445+
evict-old-files: 1d
446+
399447
- name: Build with native CMake MUSA support
400448
id: cmake_build
401449
run: |
@@ -435,6 +483,12 @@ jobs:
435483
id: checkout
436484
uses: actions/checkout@v4
437485

486+
- name: ccache
487+
uses: hendrikmuhs/[email protected]
488+
with:
489+
key: ubuntu-22-cmake-sycl
490+
evict-old-files: 1d
491+
438492
- name: Build
439493
id: cmake_build
440494
run: |
@@ -479,6 +533,12 @@ jobs:
479533
id: checkout
480534
uses: actions/checkout@v4
481535

536+
- name: ccache
537+
uses: hendrikmuhs/[email protected]
538+
with:
539+
key: ubuntu-22-cmake-sycl-fp16
540+
evict-old-files: 1d
541+
482542
- name: Build
483543
id: cmake_build
484544
run: |
@@ -500,6 +560,12 @@ jobs:
500560
id: checkout
501561
uses: actions/checkout@v4
502562

563+
- name: ccache
564+
uses: hendrikmuhs/[email protected]
565+
with:
566+
key: macOS-latest-cmake-ios
567+
evict-old-files: 1d
568+
503569
- name: Dependencies
504570
id: depends
505571
continue-on-error: true
@@ -531,6 +597,12 @@ jobs:
531597
id: checkout
532598
uses: actions/checkout@v4
533599

600+
- name: ccache
601+
uses: hendrikmuhs/[email protected]
602+
with:
603+
key: macOS-latest-cmake-tvos
604+
evict-old-files: 1d
605+
534606
- name: Dependencies
535607
id: depends
536608
continue-on-error: true
@@ -566,6 +638,12 @@ jobs:
566638
id: checkout
567639
uses: actions/checkout@v4
568640

641+
- name: ccache
642+
uses: hendrikmuhs/[email protected]
643+
with:
644+
key: macOS-latest-swift
645+
evict-old-files: 1d
646+
569647
- name: Dependencies
570648
id: depends
571649
continue-on-error: true
@@ -607,6 +685,12 @@ jobs:
607685
- name: Clone
608686
uses: actions/checkout@v4
609687

688+
- name: ccache
689+
uses: hendrikmuhs/[email protected]
690+
with:
691+
key: windows-msys2
692+
evict-old-files: 1d
693+
610694
- name: Setup ${{ matrix.sys }}
611695
uses: msys2/setup-msys2@v2
612696
with:
@@ -675,6 +759,12 @@ jobs:
675759
with:
676760
fetch-depth: 0
677761

762+
- name: ccache
763+
uses: hendrikmuhs/[email protected]
764+
with:
765+
key: windows-latest-cmake-${{ matrix.build }}
766+
evict-old-files: 1d
767+
678768
- name: Clone Kompute submodule
679769
id: clone_kompute
680770
if: ${{ matrix.build == 'kompute-x64' }}
@@ -869,6 +959,12 @@ jobs:
869959
apt-get update
870960
apt install -y cmake build-essential ninja-build libcurl4-openssl-dev libgomp1 git
871961
962+
- name: ccache
963+
uses: hendrikmuhs/[email protected]
964+
with:
965+
key: ubuntu-latest-cmake-cuda
966+
evict-old-files: 1d
967+
872968
- name: Build with CMake
873969
run: |
874970
cmake -S . -B build -G Ninja \
@@ -924,6 +1020,12 @@ jobs:
9241020
with:
9251021
fetch-depth: 0
9261022

1023+
- name: Install ccache
1024+
uses: hendrikmuhs/[email protected]
1025+
with:
1026+
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
1027+
evict-old-files: 1d
1028+
9271029
- name: Install Cuda Toolkit 11.7
9281030
if: ${{ matrix.cuda == '11.7' }}
9291031
run: |
@@ -980,11 +1082,6 @@ jobs:
9801082
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
9811083
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
9821084
983-
- name: Install ccache
984-
uses: hendrikmuhs/[email protected]
985-
with:
986-
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
987-
9881085
- name: Install Ninja
9891086
id: install_ninja
9901087
run: |
@@ -1064,6 +1161,12 @@ jobs:
10641161
with:
10651162
fetch-depth: 0
10661163

1164+
- name: ccache
1165+
uses: hendrikmuhs/[email protected]
1166+
with:
1167+
key: windows-latest-cmake-sycl
1168+
evict-old-files: 1d
1169+
10671170
- name: Install
10681171
run: |
10691172
scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
@@ -1143,9 +1246,10 @@ jobs:
11431246
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
11441247
11451248
- name: Install ccache
1146-
uses: hendrikmuhs/[email protected]
1249+
uses: hendrikmuhs/[email protected].16
11471250
with:
11481251
key: ${{ github.job }}
1252+
evict-old-files: 1d
11491253

11501254
- name: Build
11511255
id: cmake_build
@@ -1175,6 +1279,12 @@ jobs:
11751279
with:
11761280
fetch-depth: 0
11771281

1282+
- name: ccache
1283+
uses: hendrikmuhs/[email protected]
1284+
with:
1285+
key: windows-latest-cmake-hip-release
1286+
evict-old-files: 1d
1287+
11781288
- name: Install
11791289
id: depends
11801290
run: |
@@ -1272,6 +1382,12 @@ jobs:
12721382
- name: Clone
12731383
uses: actions/checkout@v4
12741384

1385+
- name: ccache
1386+
uses: hendrikmuhs/[email protected]
1387+
with:
1388+
key: android-build
1389+
evict-old-files: 1d
1390+
12751391
- name: Set up JDK
12761392
uses: actions/setup-java@v3
12771393
with:
@@ -1310,6 +1426,12 @@ jobs:
13101426
with:
13111427
fetch-depth: 0
13121428

1429+
- name: ccache
1430+
uses: hendrikmuhs/[email protected]
1431+
with:
1432+
key: release
1433+
evict-old-files: 1d
1434+
13131435
- name: Determine tag name
13141436
id: tag
13151437
shell: bash

.github/workflows/server.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ jobs:
205205
run: |
206206
cd examples/server/tests
207207
$env:PYTHONIOENCODING = ":replace"
208-
pytest -v -x
208+
pytest -v -x -m "not slow"
209209
210210
- name: Slow tests
211211
id: server_integration_tests_slow

Makefile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ TEST_TARGETS = \
5252
tests/test-arg-parser \
5353
tests/test-autorelease \
5454
tests/test-backend-ops \
55+
tests/test-chat \
5556
tests/test-chat-template \
5657
tests/test-double-float \
5758
tests/test-grammar-integration \
@@ -983,6 +984,7 @@ OBJ_COMMON = \
983984
$(DIR_COMMON)/ngram-cache.o \
984985
$(DIR_COMMON)/sampling.o \
985986
$(DIR_COMMON)/speculative.o \
987+
$(DIR_COMMON)/chat.o \
986988
$(DIR_COMMON)/build-info.o \
987989
$(DIR_COMMON)/json-schema-to-grammar.o
988990

@@ -1361,6 +1363,8 @@ llama-server: \
13611363
examples/server/httplib.h \
13621364
examples/server/index.html.hpp \
13631365
examples/server/loading.html.hpp \
1366+
common/chat.cpp \
1367+
common/chat.hpp \
13641368
common/chat-template.hpp \
13651369
common/json.hpp \
13661370
common/minja.hpp \
@@ -1471,6 +1475,11 @@ tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \
14711475
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
14721476
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
14731477

1478+
tests/test-chat: tests/test-chat.cpp \
1479+
$(OBJ_ALL)
1480+
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
1481+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1482+
14741483
tests/test-opt: tests/test-opt.cpp \
14751484
$(OBJ_GGML)
14761485
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
1818

1919
- **How to use [MTLResidencySet](https://developer.apple.com/documentation/metal/mtlresidencyset?language=objc) to keep the GPU memory active?** https://github.com/ggerganov/llama.cpp/pull/11427
2020
- **VS Code extension for FIM completions:** https://github.com/ggml-org/llama.vscode
21+
- Universal tool call support in `llama-server`: https://github.com/ggerganov/llama.cpp/pull/9639
2122
- Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim
2223
- Introducing GGUF-my-LoRA https://github.com/ggerganov/llama.cpp/discussions/10123
2324
- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggerganov/llama.cpp/discussions/9669

common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ add_library(${TARGET} STATIC
5656
arg.cpp
5757
arg.h
5858
base64.hpp
59+
chat.cpp
60+
chat.hpp
5961
chat-template.hpp
6062
common.cpp
6163
common.h

common/chat-template.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,12 @@ class chat_template {
283283
message["role"] = "user";
284284
auto obj = json {
285285
{"tool_response", {
286-
{"tool", message.at("name")},
287286
{"content", message.at("content")},
288287
}},
289288
};
289+
if (message.contains("name")) {
290+
obj["tool_response"]["name"] = message.at("name");
291+
}
290292
if (message.contains("tool_call_id")) {
291293
obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
292294
}

0 commit comments

Comments
 (0)