Skip to content

Commit 3ee6016

Browse files
committed
[fbuffers] Expose async interface
Add functions to Llama.cpp public headers to asynchronously load shards.
1 parent 69aeda8 commit 3ee6016

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

include/llama-cpp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,5 @@ typedef std::unique_ptr<llama_sampler, llama_sampler_deleter> llama_sampler_ptr;
3131
typedef std::unique_ptr<llama_adapter_lora, llama_adapter_lora_deleter> llama_adapter_lora_ptr;
3232

3333
struct llama_model * llama_model_load_from_buffer(std::vector<uint8_t> && data, struct llama_model_params params);
34+
bool llama_model_load_fulfill_split_future(const char * path, const char * context,
35+
std::unique_ptr<std::basic_streambuf<uint8_t>> && streambuf);

include/llama.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,11 @@ extern "C" {
456456
size_t n_paths,
457457
struct llama_model_params params);
458458

459+
LLAMA_API struct llama_model * llama_model_load_from_split_futures(const char ** paths, size_t n_paths,
460+
const char * context,
461+
const char * tensor_list_file,
462+
struct llama_model_params params);
463+
459464
LLAMA_API void llama_model_save_to_file(
460465
const struct llama_model * model,
461466
const char * path_model);

src/llama.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <cstring>
1919
#include <ctime>
2020
#include <stdexcept>
21+
#include <streambuf>
2122

2223
#if defined(_MSC_VER)
2324
#pragma warning(disable: 4244 4267) // possible loss of data
@@ -298,6 +299,28 @@ struct llama_model * llama_model_load_from_splits(const char ** paths, size_t n_
298299
return llama_model_load_from_file_impl(ml, params);
299300
}
300301

302+
struct llama_model * llama_model_load_from_split_futures(const char ** paths, size_t n_paths, const char * context,
303+
const char * tensor_list_file,
304+
struct llama_model_params params) {
305+
std::vector<std::string> splits = splits_from_c_paths(paths, n_paths);
306+
if (splits.empty()) {
307+
return nullptr;
308+
}
309+
std::string tensor_list_file_str(tensor_list_file);
310+
311+
load_input_variant::buffer_future_load_input loader_input{ splits.front(), context, splits, tensor_list_file_str };
312+
override_and_disable_mmap(params);
313+
llama_model_loader ml(loader_input, params.use_mmap, params.check_tensors, params.kv_overrides,
314+
params.tensor_buft_overrides);
315+
return llama_model_load_from_file_impl(ml, params);
316+
}
317+
318+
bool llama_model_load_fulfill_split_future(const char * path, const char * context,
319+
std::unique_ptr<std::basic_streambuf<uint8_t>> && streambuf) {
320+
return llama_future_file_buffer_ro::fulfill_promise(path, context,
321+
std::make_unique<llama_file_buffer_ro>(std::move(streambuf)));
322+
}
323+
301324
void llama_model_save_to_file(const struct llama_model * model, const char * path_model) {
302325
llama_model_saver ms(*model);
303326
ms.add_kv_from_model();

0 commit comments

Comments
 (0)