Skip to content

Commit c254ee7

Browse files
authored
Merge branch 'master' into gg/cmake-warnings
2 parents 6fab3ff + 7db3846 commit c254ee7

File tree

20 files changed

+1007
-380
lines changed

20 files changed

+1007
-380
lines changed

.github/labeler.yml

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,18 @@ Kompute:
33
- changed-files:
44
- any-glob-to-any-file:
55
- ggml/include/ggml-kompute.h
6-
- ggml/src/ggml-kompute.cpp
6+
- ggml/src/ggml-kompute/**
77
- README-kompute.md
88
Apple Metal:
99
- changed-files:
1010
- any-glob-to-any-file:
1111
- ggml/include/ggml-metal.h
12-
- ggml/src/ggml-metal.cpp
12+
- ggml/src/ggml-metal/**
1313
- README-metal.md
1414
SYCL:
1515
- changed-files:
1616
- any-glob-to-any-file:
1717
- ggml/include/ggml-sycl.h
18-
- ggml/src/ggml-sycl.cpp
1918
- ggml/src/ggml-sycl/**
2019
- docs/backend/SYCL.md
2120
- examples/sycl/**
@@ -27,8 +26,8 @@ Nvidia GPU:
2726
Vulkan:
2827
- changed-files:
2928
- any-glob-to-any-file:
30-
- ggml/ggml_vk_generate_shaders.py
31-
- ggml/src/ggml-vulkan*
29+
- ggml/include/ggml-vulkan.h
30+
- ggml/src/ggml-vulkan/**
3231
documentation:
3332
- changed-files:
3433
- any-glob-to-any-file:
@@ -75,11 +74,7 @@ server:
7574
ggml:
7675
- changed-files:
7776
- any-glob-to-any-file:
78-
- ggml/include/ggml*.h
79-
- ggml/src/ggml*.c
80-
- ggml/src/ggml*.cpp
81-
- ggml/src/ggml*.h
82-
- ggml-cuda/**
77+
- ggml/**
8378
nix:
8479
- changed-files:
8580
- any-glob-to-any-file:

.github/workflows/build.yml

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -871,8 +871,65 @@ jobs:
871871
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
872872
name: llama-bin-win-${{ matrix.build }}.zip
873873

874+
ubuntu-latest-cmake-cuda:
875+
runs-on: ubuntu-latest
876+
container: nvidia/cuda:12.6.2-devel-ubuntu24.04
877+
878+
steps:
879+
- name: Clone
880+
id: checkout
881+
uses: actions/checkout@v4
882+
883+
- name: Install dependencies
884+
env:
885+
DEBIAN_FRONTEND: noninteractive
886+
run: |
887+
apt update
888+
apt install -y cmake build-essential ninja-build libgomp1 git
889+
890+
- name: Build with CMake
891+
run: |
892+
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89-real -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined -DLLAMA_FATAL_WARNINGS=ON
893+
cmake --build build
894+
874895
windows-latest-cmake-cuda:
896+
runs-on: windows-latest
897+
898+
strategy:
899+
matrix:
900+
cuda: ['12.6.2']
901+
build: ['cuda']
902+
903+
steps:
904+
- name: Clone
905+
id: checkout
906+
uses: actions/checkout@v4
907+
908+
- name: Install CUDA toolkit
909+
id: cuda-toolkit
910+
uses: Jimver/[email protected]
911+
with:
912+
cuda: ${{ matrix.cuda }}
913+
method: 'network'
914+
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
915+
916+
- name: Install Ninja
917+
id: install_ninja
918+
run: |
919+
choco install ninja
920+
921+
- name: Build
922+
id: cmake_build
923+
shell: cmd
924+
run: |
925+
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
926+
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON -DCMAKE_CUDA_ARCHITECTURES=89-real
927+
cmake --build build --config Release -t ggml-cuda
928+
cmake --build build --config Release
929+
930+
windows-2019-cmake-cuda:
875931
runs-on: windows-2019
932+
if: ${{ github.event == 'push' && github.ref == 'refs/heads/master' }}
876933

877934
strategy:
878935
matrix:
@@ -1173,7 +1230,7 @@ jobs:
11731230
- macOS-latest-make
11741231
- macOS-latest-cmake
11751232
- windows-latest-cmake
1176-
- windows-latest-cmake-cuda
1233+
- windows-2019-cmake-cuda
11771234
- windows-latest-cmake-hip-release
11781235
- macOS-latest-cmake-arm64
11791236
- macOS-latest-cmake-x64

.github/workflows/docker.yml

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,10 @@
1010
name: Publish Docker image
1111

1212
on:
13-
#pull_request:
14-
push:
15-
branches:
16-
- master
17-
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
18-
workflow_dispatch: # allows manual triggering, useful for debugging
13+
workflow_dispatch: # allows manual triggering
14+
schedule:
15+
# Rebuild daily rather than on every push because it is expensive
16+
- cron: '12 4 * * *'
1917

2018
concurrency:
2119
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
@@ -29,7 +27,6 @@ permissions:
2927
jobs:
3028
push_to_registry:
3129
name: Push Docker image to Docker Hub
32-
#if: github.event.pull_request.draft == false
3330

3431
runs-on: ubuntu-latest
3532
env:
@@ -117,7 +114,7 @@ jobs:
117114
swap-storage: true
118115

119116
- name: Build and push Docker image (tagged + versioned)
120-
if: github.event_name == 'push'
117+
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
121118
uses: docker/build-push-action@v6
122119
with:
123120
context: .

.github/workflows/nix-ci.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ on:
55
push:
66
branches:
77
- master
8+
paths: ['.github/workflows/nix-ci.yml', '**/flake.nix', '**/flake.lock', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
89
pull_request:
910
types: [opened, synchronize, reopened]
11+
paths: ['.github/workflows/nix-ci.yml', '**/flake.nix', '**/flake.lock', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
1012

1113
concurrency:
1214
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}

.github/workflows/python-lint.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
name: flake8 Lint
22

3-
on: [push, pull_request]
3+
on:
4+
push:
5+
branches:
6+
- master
7+
paths: ['.github/workflows/python-lint.yml', '**/*.py']
8+
pull_request:
9+
types: [opened, synchronize, reopened]
10+
paths: ['.github/workflows/python-lint.yml', '**/*.py']
411

512
concurrency:
613
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}

CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,11 @@ if (GGML_TARGET_DEFINES)
164164
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
165165
endif()
166166
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
167-
168-
set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
167+
# all public headers
168+
set(LLAMA_PUBLIC_HEADERS
169+
${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
170+
${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
171+
set_target_properties(llama PROPERTIES PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
169172
install(TARGETS llama LIBRARY PUBLIC_HEADER)
170173

171174
configure_package_config_file(

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ BUILD_TARGETS = \
3434
llama-server \
3535
llama-simple \
3636
llama-simple-chat \
37+
llama-run \
3738
llama-speculative \
3839
llama-tokenize \
3940
llama-vdot \
@@ -1167,6 +1168,11 @@ llama-infill: examples/infill/infill.cpp \
11671168
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
11681169
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
11691170

1171+
llama-run: examples/run/run.cpp \
1172+
$(OBJ_ALL)
1173+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1174+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1175+
11701176
llama-simple: examples/simple/simple.cpp \
11711177
$(OBJ_ALL)
11721178
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)

common/speculative.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,10 @@ bool common_speculative_are_compatible(
9090
if (llama_add_bos_token(model_tgt) != llama_add_bos_token(model_dft) ||
9191
llama_add_eos_token(model_tgt) != llama_add_eos_token(model_dft) ||
9292
llama_token_bos(model_tgt) != llama_token_bos(model_dft) ||
93-
llama_token_eos(model_tgt) != llama_token_eos(model_dft)
94-
) {
93+
llama_token_eos(model_tgt) != llama_token_eos(model_dft)) {
9594
LOG_ERR("%s: draft model special tokens must match target model to use speculation\n", __func__);
95+
LOG_ERR("%s: tgt: bos = %d (%d), eos = %d (%d)\n", __func__, llama_token_bos(model_tgt), llama_add_bos_token(model_tgt), llama_token_eos(model_tgt), llama_add_eos_token(model_tgt));
96+
LOG_ERR("%s: dft: bos = %d (%d), eos = %d (%d)\n", __func__, llama_token_bos(model_dft), llama_add_bos_token(model_dft), llama_token_eos(model_dft), llama_add_eos_token(model_dft));
9697
return false;
9798
}
9899

examples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ else()
4040
add_subdirectory(server)
4141
endif()
4242
add_subdirectory(save-load-state)
43+
add_subdirectory(run)
4344
add_subdirectory(simple)
4445
add_subdirectory(simple-chat)
4546
add_subdirectory(speculative)

examples/run/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
set(TARGET llama-run)
2+
add_executable(${TARGET} run.cpp)
3+
install(TARGETS ${TARGET} RUNTIME)
4+
target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
5+
target_compile_features(${TARGET} PRIVATE cxx_std_11)

0 commit comments

Comments
 (0)