Skip to content

Commit 7c3f9c2

Browse files
committed
Merge branch 'master' into compilade/test-model-random
2 parents ccb2bb9 + 8846aac commit 7c3f9c2

File tree

142 files changed

+9476
-6362
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

142 files changed

+9476
-6362
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: Build relocatable cmake package
2+
on:
3+
workflow_dispatch:
4+
workflow_call:
5+
6+
jobs:
7+
linux:
8+
runs-on: ubuntu-24.04
9+
steps:
10+
- uses: actions/checkout@v4
11+
with:
12+
fetch-depth: 0
13+
14+
- name: Install dependencies
15+
run: |
16+
sudo apt update
17+
sudo apt install -y build-essential tcl
18+
19+
- name: Build
20+
run: |
21+
PREFIX="$(pwd)"/inst
22+
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX" \
23+
-DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \
24+
-DLLAMA_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release
25+
cmake --build build --config Release
26+
cmake --install build --prefix "$PREFIX" --config Release
27+
28+
export LLAMA_CONFIG="$PREFIX"/lib/cmake/llama/llama-config.cmake
29+
tclsh <<'EOF'
30+
set build(commit) [string trim [exec git rev-parse --short HEAD]]
31+
set build(number) [string trim [exec git rev-list --count HEAD]]
32+
set build(version) "0.0.$build(number)"
33+
34+
set llamaconfig [read [open "$env(LLAMA_CONFIG)" r]]
35+
set checks [list "set\\(LLAMA_VERSION \\s+$build(version)\\)" \
36+
"set\\(LLAMA_BUILD_COMMIT\\s+$build(commit)\\)" \
37+
"set\\(LLAMA_BUILD_NUMBER\\s+$build(number)\\)"]
38+
39+
puts -nonewline "Checking llama-config.cmake version... "
40+
foreach check $checks {
41+
if {![regexp -expanded -- $check $llamaconfig]} {
42+
puts "\"$check\" failed!"
43+
exit 1
44+
}
45+
}
46+
puts "success."
47+
EOF
48+
49+
cd examples/simple-cmake-pkg
50+
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX"/lib/cmake
51+
cmake --build build

.github/workflows/build.yml

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,43 @@ on:
55
push:
66
branches:
77
- master
8-
paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
8+
paths: [
9+
'.github/workflows/build.yml',
10+
'.github/workflows/build-linux-cross.yml',
11+
'.github/workflows/build-cmake-pkg.yml',
12+
'**/CMakeLists.txt',
13+
'**/.cmake',
14+
'**/*.h',
15+
'**/*.hpp',
16+
'**/*.c',
17+
'**/*.cpp',
18+
'**/*.cu',
19+
'**/*.cuh',
20+
'**/*.swift',
21+
'**/*.m',
22+
'**/*.metal',
23+
'**/*.comp'
24+
]
25+
926
pull_request:
1027
types: [opened, synchronize, reopened]
11-
paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
28+
paths: [
29+
'.github/workflows/build.yml',
30+
'.github/workflows/build-linux-cross.yml',
31+
'.github/workflows/build-cmake-pkg.yml',
32+
'**/CMakeLists.txt',
33+
'**/.cmake',
34+
'**/*.h',
35+
'**/*.hpp',
36+
'**/*.c',
37+
'**/*.cpp',
38+
'**/*.cu',
39+
'**/*.cuh',
40+
'**/*.swift',
41+
'**/*.m',
42+
'**/*.metal',
43+
'**/*.comp'
44+
]
1245

1346
concurrency:
1447
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
@@ -478,6 +511,9 @@ jobs:
478511
build-linux-cross:
479512
uses: ./.github/workflows/build-linux-cross.yml
480513

514+
build-cmake-pkg:
515+
uses: ./.github/workflows/build-cmake-pkg.yml
516+
481517
macOS-latest-cmake-ios:
482518
runs-on: macos-latest
483519

@@ -683,7 +719,7 @@ jobs:
683719
env:
684720
OPENBLAS_VERSION: 0.3.23
685721
SDE_VERSION: 9.33.0-2024-01-07
686-
VULKAN_VERSION: 1.4.309.0
722+
VULKAN_VERSION: 1.4.313.2
687723

688724
strategy:
689725
matrix:
@@ -736,7 +772,7 @@ jobs:
736772
id: get_vulkan
737773
if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
738774
run: |
739-
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
775+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
740776
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
741777
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
742778
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"

.github/workflows/release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ jobs:
302302

303303
env:
304304
OPENBLAS_VERSION: 0.3.23
305-
VULKAN_VERSION: 1.4.309.0
305+
VULKAN_VERSION: 1.4.313.2
306306

307307
strategy:
308308
matrix:
@@ -332,7 +332,7 @@ jobs:
332332
id: get_vulkan
333333
if: ${{ matrix.backend == 'vulkan' }}
334334
run: |
335-
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
335+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
336336
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
337337
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
338338
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ endif()
9595
if (NOT DEFINED LLAMA_BUILD_COMMIT)
9696
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
9797
endif()
98-
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
98+
set(LLAMA_INSTALL_VERSION 0.0.${LLAMA_BUILD_NUMBER})
9999

100100
# override ggml options
101101
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ function gg_run_rerank_tiny {
779779
model_f16="${path_models}/ggml-model-f16.gguf"
780780

781781
# for this model, the SEP token is "</s>"
782-
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
782+
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?\thi\nwhat is panda?\tit's a bear\nwhat is panda?\tThe giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
783783

784784
# sample output
785785
# rerank score 0: 0.029

common/arg.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2706,6 +2706,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27062706
params.embd_sep = value;
27072707
}
27082708
).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
2709+
add_opt(common_arg(
2710+
{"--cls-separator"}, "STRING",
2711+
"separator of classification sequences (default \\t) for example \"<#seq#>\"",
2712+
[](common_params & params, const std::string & value) {
2713+
params.cls_sep = value;
2714+
}
2715+
).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
27092716
add_opt(common_arg(
27102717
{"--host"}, "HOST",
27112718
string_format("ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: %s)", params.hostname.c_str()),
@@ -3210,6 +3217,32 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
32103217
params.speculative.model.path = value;
32113218
}
32123219
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT"));
3220+
add_opt(common_arg(
3221+
{"-ctkd", "--cache-type-k-draft"}, "TYPE",
3222+
string_format(
3223+
"KV cache data type for K for the draft model\n"
3224+
"allowed values: %s\n"
3225+
"(default: %s)",
3226+
get_all_kv_cache_types().c_str(),
3227+
ggml_type_name(params.speculative.cache_type_k)
3228+
),
3229+
[](common_params & params, const std::string & value) {
3230+
params.speculative.cache_type_k = kv_cache_type_from_str(value);
3231+
}
3232+
).set_env("LLAMA_ARG_CACHE_TYPE_K_DRAFT"));
3233+
add_opt(common_arg(
3234+
{"-ctvd", "--cache-type-v-draft"}, "TYPE",
3235+
string_format(
3236+
"KV cache data type for V for the draft model\n"
3237+
"allowed values: %s\n"
3238+
"(default: %s)",
3239+
get_all_kv_cache_types().c_str(),
3240+
ggml_type_name(params.speculative.cache_type_v)
3241+
),
3242+
[](common_params & params, const std::string & value) {
3243+
params.speculative.cache_type_v = kv_cache_type_from_str(value);
3244+
}
3245+
).set_env("LLAMA_ARG_CACHE_TYPE_V_DRAFT"));
32133246

32143247
add_opt(common_arg(
32153248
{"-mv", "--model-vocoder"}, "FNAME",

common/common.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,11 +706,17 @@ bool fs_validate_filename(const std::string & filename) {
706706
// disable C++17 deprecation warning for std::codecvt_utf8
707707
# pragma clang diagnostic push
708708
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
709+
#elif defined(__GNUC__)
710+
# pragma GCC diagnostic push
711+
# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
709712
#endif
713+
710714
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
711715

712716
#if defined(__clang__)
713717
# pragma clang diagnostic pop
718+
#elif defined(__GNUC__)
719+
# pragma GCC diagnostic pop
714720
#endif
715721

716722
filename_utf32 = converter.from_bytes(filename);
@@ -1284,6 +1290,9 @@ std::vector<llama_token> common_tokenize(
12841290
int n_tokens = text.length() + 2 * add_special;
12851291
std::vector<llama_token> result(n_tokens);
12861292
n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
1293+
if (n_tokens == std::numeric_limits<int32_t>::min()) {
1294+
throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
1295+
}
12871296
if (n_tokens < 0) {
12881297
result.resize(-n_tokens);
12891298
int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);

common/common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ struct common_params_speculative {
199199
float p_split = 0.1f; // speculative decoding split probability
200200
float p_min = 0.75f; // minimum speculative decoding probability (greedy)
201201

202+
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
203+
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
204+
202205
struct cpu_params cpuparams;
203206
struct cpu_params cpuparams_batch;
204207

@@ -355,6 +358,7 @@ struct common_params {
355358
int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
356359
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
357360
std::string embd_sep = "\n"; // separator of embeddings
361+
std::string cls_sep = "\t"; // separator of classification sequences
358362

359363
// server params
360364
int32_t port = 8080; // server listens on this network port

common/json-schema-to-grammar.cpp

Lines changed: 3 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -41,49 +41,6 @@ static std::string build_repetition(const std::string & item_rule, int min_items
4141
return result;
4242
}
4343

44-
/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */
45-
class string_view {
46-
const std::string & _str;
47-
const size_t _start;
48-
const size_t _end;
49-
public:
50-
string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {}
51-
52-
size_t size() const {
53-
return _end - _start;
54-
}
55-
56-
size_t length() const {
57-
return size();
58-
}
59-
60-
operator std::string() const {
61-
return str();
62-
}
63-
64-
std::string str() const {
65-
return _str.substr(_start, _end - _start);
66-
}
67-
68-
string_view substr(size_t pos, size_t len = std::string::npos) const {
69-
return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len);
70-
}
71-
72-
char operator[](size_t pos) const {
73-
auto index = _start + pos;
74-
if (index >= _end) {
75-
throw std::out_of_range("string_view index out of range");
76-
}
77-
return _str[_start + pos];
78-
}
79-
80-
bool operator==(const string_view & other) const {
81-
std::string this_str = *this;
82-
std::string other_str = other;
83-
return this_str == other_str;
84-
}
85-
};
86-
8744
static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
8845
auto has_min = min_value != std::numeric_limits<int>::min();
8946
auto has_max = max_value != std::numeric_limits<int>::max();
@@ -112,14 +69,14 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
11269
}
11370
out << "}";
11471
};
115-
std::function<void(const string_view &, const string_view &)> uniform_range =
116-
[&](const string_view & from, const string_view & to) {
72+
std::function<void(const std::string_view &, const std::string_view &)> uniform_range =
73+
[&](const std::string_view & from, const std::string_view & to) {
11774
size_t i = 0;
11875
while (i < from.length() && i < to.length() && from[i] == to[i]) {
11976
i++;
12077
}
12178
if (i > 0) {
122-
out << "\"" << from.substr(0, i).str() << "\"";
79+
out << "\"" << from.substr(0, i) << "\"";
12380
}
12481
if (i < from.length() && i < to.length()) {
12582
if (i > 0) {

0 commit comments

Comments
 (0)