Skip to content

Commit d67fefb

Browse files
committed
Merge branch 'master' into xsn/lora_per_request
2 parents 076346d + 0827b2c commit d67fefb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+941
-624
lines changed

common/common.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <cstdarg>
1919
#include <cstring>
2020
#include <ctime>
21+
#include <filesystem>
2122
#include <fstream>
2223
#include <iostream>
2324
#include <iterator>
@@ -62,7 +63,9 @@
6263
#ifdef __linux__
6364
#include <linux/limits.h>
6465
#elif defined(_WIN32)
65-
#define PATH_MAX MAX_PATH
66+
# if !defined(PATH_MAX)
67+
# define PATH_MAX MAX_PATH
68+
# endif
6669
#else
6770
#include <sys/syslimits.h>
6871
#endif
@@ -1148,8 +1151,7 @@ static bool common_download_file(const std::string & url, const std::string & pa
11481151
#endif
11491152

11501153
// Check if the file already exists locally
1151-
struct stat model_file_info;
1152-
auto file_exists = (stat(path.c_str(), &model_file_info) == 0);
1154+
auto file_exists = std::filesystem::exists(path);
11531155

11541156
// If the file exists, check its JSON metadata companion file.
11551157
std::string metadata_path = path + ".json";
@@ -1612,6 +1614,18 @@ std::string common_detokenize(llama_context * ctx, const std::vector<llama_token
16121614
// Chat template utils
16131615
//
16141616

1617+
std::string common_get_builtin_chat_template(const struct llama_model * model) {
1618+
static const char * template_key = "tokenizer.chat_template";
1619+
// call with NULL buffer to get the total size of the string
1620+
int32_t res = llama_model_meta_val_str(model, template_key, NULL, 0);
1621+
if (res > 0) {
1622+
std::vector<char> model_template(res + 1, 0);
1623+
llama_model_meta_val_str(model, template_key, model_template.data(), model_template.size());
1624+
return std::string(model_template.data(), model_template.size() - 1);
1625+
}
1626+
return "";
1627+
}
1628+
16151629
bool common_chat_verify_template(const std::string & tmpl) {
16161630
llama_chat_message chat[] = {{"user", "test"}};
16171631
int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0);

common/common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,9 @@ struct common_chat_msg {
571571
std::string content;
572572
};
573573

574+
// Get the built-in chat template for the model. Return empty string if not present.
575+
std::string common_get_builtin_chat_template(const struct llama_model * model);
576+
574577
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
575578
bool common_chat_verify_template(const std::string & tmpl);
576579

convert_hf_to_gguf.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1764,25 +1764,19 @@ def set_vocab(self):
17641764
self.gguf_writer.add_token_list(tokens)
17651765
self.gguf_writer.add_token_types(toktypes)
17661766

1767-
special_vocab = gguf.SpecialVocab(
1768-
self.dir_model, load_merges=True,
1769-
special_token_types = ['bos', 'eos', 'eom', 'eot']
1770-
)
1771-
special_vocab._set_special_token("bos", 128000)
1772-
special_vocab._set_special_token("eos", 128001)
1773-
special_vocab._set_special_token("eom", 128008)
1774-
special_vocab._set_special_token("eot", 128009)
1767+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
17751768
special_vocab.add_to_gguf(self.gguf_writer)
17761769
else:
17771770
# DeciLM-7B
17781771
self._set_vocab_llama_hf()
1779-
# self._set_vocab_gpt2()
17801772

17811773
def set_gguf_parameters(self):
17821774
if "block_configs" in self.hparams: # Llama-3_1-Nemotron-51B
17831775
assert self.block_count == len(self._num_kv_heads)
17841776
assert self.block_count == len(self._num_heads)
17851777
assert self.block_count == len(self._ffn_dims)
1778+
if (rope_theta := self.hparams.get("rope_theta")) is not None:
1779+
self.gguf_writer.add_rope_freq_base(rope_theta)
17861780
self.gguf_writer.add_head_count_kv(self._num_kv_heads)
17871781
self.gguf_writer.add_head_count(self._num_heads)
17881782
self.gguf_writer.add_feed_forward_length(self._ffn_dims)

examples/llama.android/llama/src/main/cpp/llama-android.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,9 @@ Java_android_llama_cpp_LLamaAndroid_new_1batch(JNIEnv *, jobject, jint n_tokens,
305305
extern "C"
306306
JNIEXPORT void JNICALL
307307
Java_android_llama_cpp_LLamaAndroid_free_1batch(JNIEnv *, jobject, jlong batch_pointer) {
308-
llama_batch_free(*reinterpret_cast<llama_batch *>(batch_pointer));
308+
//llama_batch_free(*reinterpret_cast<llama_batch *>(batch_pointer));
309+
const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
310+
delete batch;
309311
}
310312

311313
extern "C"

examples/run/run.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#if defined(_WIN32)
22
# include <windows.h>
3+
# include <io.h>
34
#else
45
# include <sys/file.h>
56
# include <sys/ioctl.h>
@@ -253,7 +254,7 @@ class File {
253254
return 1;
254255
}
255256

256-
OVERLAPPED overlapped = { 0 };
257+
OVERLAPPED overlapped = {};
257258
if (!LockFileEx(hFile, LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, 0, MAXDWORD, MAXDWORD,
258259
&overlapped)) {
259260
fd = -1;
@@ -277,7 +278,7 @@ class File {
277278
if (fd >= 0) {
278279
# ifdef _WIN32
279280
if (hFile != INVALID_HANDLE_VALUE) {
280-
OVERLAPPED overlapped = { 0 };
281+
OVERLAPPED overlapped = {};
281282
UnlockFileEx(hFile, 0, MAXDWORD, MAXDWORD, &overlapped);
282283
}
283284
# else
@@ -293,7 +294,7 @@ class File {
293294
private:
294295
int fd = -1;
295296
# ifdef _WIN32
296-
HANDLE hFile;
297+
HANDLE hFile = nullptr;
297298
# endif
298299
};
299300

@@ -464,7 +465,7 @@ class HttpClient {
464465
return (now_downloaded_plus_file_size * 100) / total_to_download;
465466
}
466467

467-
static std::string generate_progress_prefix(curl_off_t percentage) { return fmt("%3ld%% |", percentage); }
468+
static std::string generate_progress_prefix(curl_off_t percentage) { return fmt("%3ld%% |", static_cast<long int>(percentage)); }
468469

469470
static double calculate_speed(curl_off_t now_downloaded, const std::chrono::steady_clock::time_point & start_time) {
470471
const auto now = std::chrono::steady_clock::now();

0 commit comments

Comments
 (0)