Skip to content

Commit bea6d06

Browse files
authored
add adapter metadata api
1 parent 966d0e0 commit bea6d06

File tree

5 files changed

+87
-20
lines changed

5 files changed

+87
-20
lines changed

common/common.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -993,8 +993,8 @@ struct common_init_result common_init_from_params(common_params & params) {
993993
}
994994

995995
la.ptr = lora.get();
996-
la.task_name = llama_adapter_lora_task_name(la.ptr);
997-
la.prompt_prefix = llama_adapter_lora_prompt_prefix(la.ptr);
996+
llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", la.task_name, sizeof(la.task_name));
997+
llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", la.prompt_prefix, sizeof(la.prompt_prefix));
998998
iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
999999
}
10001000

common/common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ struct common_adapter_lora_info {
3131
std::string path;
3232
float scale;
3333

34-
std::string task_name;
35-
std::string prompt_prefix;
34+
char task_name[64];
35+
char prompt_prefix[256];
3636

3737
struct llama_adapter_lora * ptr;
3838
};

include/llama.h

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -588,11 +588,23 @@ extern "C" {
588588
struct llama_model * model,
589589
const char * path_lora);
590590

591-
// Get the LoRA task name. Returns a blank string if not applicable
592-
LLAMA_API const char * llama_adapter_lora_task_name(struct llama_adapter_lora * adapter);
591+
// Functions to access the adapter's GGUF metadata scalar values
592+
// - The functions return the length of the string on success, or -1 on failure
593+
// - The output string is always null-terminated and cleared on failure
594+
// - When retrieving a string, an extra byte must be allocated to account for the null terminator
595+
// - GGUF array values are not supported by these functions
596+
597+
// Get metadata value as a string by key name
598+
LLAMA_API int32_t llama_adapter_meta_val_str(const struct llama_adapter_lora * adapter, const char * key, char * buf, size_t buf_size);
593599

594-
// Get the required LoRA prompt prefix. Returns a blank string if not applicable
595-
LLAMA_API const char * llama_adapter_lora_prompt_prefix(struct llama_adapter_lora * adapter);
600+
// Get the number of metadata key/value pairs
601+
LLAMA_API int32_t llama_adapter_meta_count(const struct llama_adapter_lora * adapter);
602+
603+
// Get metadata key name by index
604+
LLAMA_API int32_t llama_adapter_meta_key_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);
605+
606+
// Get metadata value as a string by index
607+
LLAMA_API int32_t llama_adapter_meta_val_str_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);
596608

597609
// Manually free a LoRA adapter
598610
// Note: loaded adapters will be free when the associated model is deleted

src/llama-adapter.cpp

Lines changed: 64 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -163,13 +163,38 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
163163

164164
// check metadata
165165
{
166+
const gguf_context * gguf_ctx = ctx_gguf.get();
167+
168+
LLAMA_LOG_INFO("%s: Dumping metadata keys/values.\n", __func__);
169+
170+
// get metadata as string
171+
for (int i = 0; i < gguf_get_n_kv(gguf_ctx); i++) {
172+
gguf_type type = gguf_get_kv_type(gguf_ctx, i);
173+
const std::string type_name =
174+
type == GGUF_TYPE_ARRAY
175+
? format("%s[%s,%zu]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(gguf_ctx, i)), gguf_get_arr_n(gguf_ctx, i))
176+
: gguf_type_name(type);
177+
const char * name = gguf_get_key(gguf_ctx, i);
178+
const std::string value = gguf_kv_to_str(gguf_ctx, i);
179+
180+
if (type != GGUF_TYPE_ARRAY) {
181+
adapter.gguf_kv.emplace(name, value);
182+
}
183+
184+
const size_t MAX_VALUE_LEN = 40;
185+
std::string print_value = value.size() > MAX_VALUE_LEN ? format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()) : value;
186+
replace_all(print_value, "\n", "\\n");
187+
188+
LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), print_value.c_str());
189+
}
190+
166191
auto get_kv_str = [&](const std::string & key) -> std::string {
167-
int id = gguf_find_key(ctx_gguf.get(), key.c_str());
168-
return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf.get(), id));
192+
int id = gguf_find_key(gguf_ctx, key.c_str());
193+
return id < 0 ? "" : std::string(gguf_get_val_str(gguf_ctx, id));
169194
};
170195
auto get_kv_f32 = [&](const std::string & key) -> float {
171-
int id = gguf_find_key(ctx_gguf.get(), key.c_str());
172-
return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf.get(), id);
196+
int id = gguf_find_key(gguf_ctx, key.c_str());
197+
return id < 0 ? 0.0f : gguf_get_val_f32(gguf_ctx, id);
173198
};
174199
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
175200

@@ -190,8 +215,6 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
190215
}
191216

192217
adapter.alpha = get_kv_f32(llm_kv(LLM_KV_ADAPTER_LORA_ALPHA));
193-
adapter.task_name = get_kv_str(llm_kv(LLM_KV_ADAPTER_LORA_TASK_NAME));
194-
adapter.prompt_prefix = get_kv_str(llm_kv(LLM_KV_ADAPTER_LORA_PROMPT_PREFIX));
195218
}
196219

197220
int n_tensors = gguf_get_n_tensors(ctx_gguf.get());
@@ -385,12 +408,43 @@ llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * p
385408
return nullptr;
386409
}
387410

388-
const char * llama_adapter_lora_task_name(llama_adapter_lora * adapter) {
389-
return adapter->task_name.c_str();
411+
int32_t llama_adapter_meta_val_str(const llama_adapter_lora * adapter, const char * key, char * buf, size_t buf_size) {
412+
const auto & it = adapter->gguf_kv.find(key);
413+
if (it == adapter->gguf_kv.end()) {
414+
if (buf_size > 0) {
415+
buf[0] = '\0';
416+
}
417+
return -1;
418+
}
419+
return snprintf(buf, buf_size, "%s", it->second.c_str());
390420
}
391421

392-
const char * llama_adapter_lora_prompt_prefix(llama_adapter_lora * adapter) {
393-
return adapter->prompt_prefix.c_str();
422+
int32_t llama_adapter_meta_count(const llama_adapter_lora * adapter) {
423+
return (int)adapter->gguf_kv.size();
424+
}
425+
426+
int32_t llama_adapter_meta_key_by_index(const llama_adapter_lora * adapter, int i, char * buf, size_t buf_size) {
427+
if (i < 0 || i >= (int)adapter->gguf_kv.size()) {
428+
if (buf_size > 0) {
429+
buf[0] = '\0';
430+
}
431+
return -1;
432+
}
433+
auto it = adapter->gguf_kv.begin();
434+
std::advance(it, i);
435+
return snprintf(buf, buf_size, "%s", it->first.c_str());
436+
}
437+
438+
int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size) {
439+
if (i < 0 || i >= (int)adapter->gguf_kv.size()) {
440+
if (buf_size > 0) {
441+
buf[0] = '\0';
442+
}
443+
return -1;
444+
}
445+
auto it = adapter->gguf_kv.begin();
446+
std::advance(it, i);
447+
return snprintf(buf, buf_size, "%s", it->second.c_str());
394448
}
395449

396450
void llama_adapter_lora_free(llama_adapter_lora * adapter) {

src/llama-adapter.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ struct llama_adapter_lora {
6666
std::vector<ggml_backend_buffer_ptr> bufs;
6767

6868
float alpha;
69-
std::string task_name;
70-
std::string prompt_prefix;
69+
70+
// gguf metadata
71+
std::unordered_map<std::string, std::string> gguf_kv;
7172

7273
llama_adapter_lora() = default;
7374
~llama_adapter_lora() = default;

0 commit comments

Comments
 (0)