Skip to content

Commit 9ba2959

Browse files
committed
llama : arch (cont)
ggml-ci
1 parent 7ab08d5 commit 9ba2959

File tree

7 files changed

+1513
-1479
lines changed

7 files changed

+1513
-1479
lines changed

src/llama-adapter.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,7 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
6868
cvec.tensors.reserve(model.hparams.n_layer);
6969
cvec.tensors.push_back(nullptr); // there's never a tensor for layer 0
7070
for (size_t il = 1; il < model.hparams.n_layer; il++) {
71-
ggml_backend_buffer_type_t buft = select_buft(*model.dev_layer.at(il).buft_list,
72-
[&](ggml_context * ctx) {
73-
ggml_tensor * cur = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, model.hparams.n_embd);
74-
ggml_tensor * layer_dir = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, model.hparams.n_embd);
75-
return ggml_add(ctx, cur, layer_dir);
76-
});
71+
ggml_backend_buffer_type_t buft = llama_model_select_buft(model, il);
7772
ggml_context * ctx = ctx_for_buft(buft);
7873
if (!ctx) {
7974
LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__);

src/llama-arch.cpp

Lines changed: 1392 additions & 0 deletions
Large diffs are not rendered by default.

src/llama-arch.h

Lines changed: 58 additions & 1265 deletions
Large diffs are not rendered by default.

src/llama-context.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ struct llama_data_write {
4040
}
4141

4242
void write_model_info(const struct llama_context * ctx) {
43-
std::string arch_str = LLM_ARCH_NAMES.at(ctx->model.arch);
43+
const std::string arch_str = llm_arch_name(ctx->model.arch);
4444
write_string(arch_str);
4545
// TODO: add more model-specific info which should prevent loading the session file if not identical
4646
}
@@ -263,7 +263,8 @@ struct llama_data_read {
263263

264264
// validate model information
265265
void read_model_info(const struct llama_context * ctx) {
266-
std::string cur_arch_str = LLM_ARCH_NAMES.at(ctx->model.arch);
266+
const std::string cur_arch_str = llm_arch_name(ctx->model.arch);
267+
267268
std::string arch_str;
268269
read_string(arch_str);
269270
if (cur_arch_str != arch_str) {

src/llama-model.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "llama-model.h"
22

3+
#include "llama-impl.h"
4+
35
std::string llama_model_ftype_name(llama_ftype ftype) {
46
if (ftype & LLAMA_FTYPE_GUESSED) {
57
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
@@ -42,3 +44,49 @@ std::string llama_model_ftype_name(llama_ftype ftype) {
4244
default: return "unknown, may not work";
4345
}
4446
}
47+
48+
template<typename F>
49+
static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t dev, F & fn) {
50+
ggml_init_params params = {
51+
/*.mem_size =*/ ggml_tensor_overhead()*8,
52+
/*.mem_buffer =*/ NULL,
53+
/*.no_alloc =*/ true,
54+
};
55+
ggml_context_ptr ctx { ggml_init(params) };
56+
if (!ctx) {
57+
throw std::runtime_error(format("failed to create ggml context"));
58+
}
59+
60+
ggml_backend_buffer_ptr buf { ggml_backend_buft_alloc_buffer(buft, 0) };
61+
ggml_tensor * op_tensor = fn(ctx.get());
62+
for (int i = 0; i < GGML_MAX_SRC; i++) {
63+
if (op_tensor->src[i] != nullptr) {
64+
assert(op_tensor->src[i]->buffer == nullptr);
65+
op_tensor->src[i]->buffer = buf.get();
66+
}
67+
}
68+
bool op_supported = ggml_backend_dev_supports_op(dev, op_tensor);
69+
70+
return op_supported;
71+
}
72+
73+
template<typename F>
74+
static ggml_backend_buffer_type_t select_buft(const llama_model::buft_list_t & buft_list, const F & fn) {
75+
for (const auto & cur : buft_list) {
76+
ggml_backend_dev_t cur_dev = cur.first;
77+
ggml_backend_buffer_type_t cur_buft = cur.second;
78+
if (buft_supported(cur_buft, cur_dev, fn)) {
79+
return cur_buft;
80+
}
81+
}
82+
throw std::runtime_error(format("no suitable buffer type found"));
83+
}
84+
85+
ggml_backend_buffer_type_t llama_model_select_buft(const llama_model & model, int il) {
86+
return select_buft(*model.dev_layer.at(il).buft_list,
87+
[&](ggml_context * ctx) {
88+
ggml_tensor * cur = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, model.hparams.n_embd);
89+
ggml_tensor * layer_dir = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, model.hparams.n_embd);
90+
return ggml_add(ctx, cur, layer_dir);
91+
});
92+
}

src/llama-model.h

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
#include "llama-vocab.h"
66
#include "llama-mmap.h"
77

8-
#include "llama-impl.h"
9-
108
#include "ggml-cpp.h"
119

1210
#include <array>
@@ -613,42 +611,6 @@ struct llama_model {
613611
}
614612
};
615613

616-
template<typename F>
617-
static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t dev, F & fn) {
618-
ggml_init_params params = {
619-
/*.mem_size =*/ ggml_tensor_overhead()*8,
620-
/*.mem_buffer =*/ NULL,
621-
/*.no_alloc =*/ true,
622-
};
623-
ggml_context_ptr ctx { ggml_init(params) };
624-
if (!ctx) {
625-
throw std::runtime_error(format("failed to create ggml context"));
626-
}
627-
628-
ggml_backend_buffer_ptr buf { ggml_backend_buft_alloc_buffer(buft, 0) };
629-
ggml_tensor * op_tensor = fn(ctx.get());
630-
for (int i = 0; i < GGML_MAX_SRC; i++) {
631-
if (op_tensor->src[i] != nullptr) {
632-
assert(op_tensor->src[i]->buffer == nullptr);
633-
op_tensor->src[i]->buffer = buf.get();
634-
}
635-
}
636-
bool op_supported = ggml_backend_dev_supports_op(dev, op_tensor);
637-
638-
return op_supported;
639-
}
640-
641-
template<typename F>
642-
static ggml_backend_buffer_type_t select_buft(const llama_model::buft_list_t & buft_list, const F & fn) {
643-
for (const auto & cur : buft_list) {
644-
ggml_backend_dev_t cur_dev = cur.first;
645-
ggml_backend_buffer_type_t cur_buft = cur.second;
646-
if (buft_supported(cur_buft, cur_dev, fn)) {
647-
return cur_buft;
648-
}
649-
}
650-
throw std::runtime_error(format("no suitable buffer type found"));
651-
}
652-
614+
ggml_backend_buffer_type_t llama_model_select_buft(const llama_model & model, int il);
653615

654616
std::string llama_model_ftype_name(llama_ftype ftype);

0 commit comments

Comments
 (0)