add adapter metadata api

CISC · web-flow · commit bea6d0617ee4 · 2025-07-07T07:24:28.000+02:00
diff --git a/common/common.cpp b/common/common.cpp
@@ -993,8 +993,8 @@ struct common_init_result common_init_from_params(common_params & params) {
         }
 
         la.ptr = lora.get();
-        la.task_name = llama_adapter_lora_task_name(la.ptr);
-        la.prompt_prefix = llama_adapter_lora_prompt_prefix(la.ptr);
+        llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", la.task_name, sizeof(la.task_name));
+        llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", la.prompt_prefix, sizeof(la.prompt_prefix));
         iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
     }
 
diff --git a/common/common.h b/common/common.h
@@ -31,8 +31,8 @@ struct common_adapter_lora_info {
     std::string path;
     float scale;
 
-    std::string task_name;
-    std::string prompt_prefix;
+    char task_name[64];
+    char prompt_prefix[256];
 
     struct llama_adapter_lora * ptr;
 };
diff --git a/include/llama.h b/include/llama.h
@@ -588,11 +588,23 @@ extern "C" {
             struct llama_model * model,
             const char * path_lora);
 
-    // Get the LoRA task name. Returns a blank string if not applicable
-    LLAMA_API const char * llama_adapter_lora_task_name(struct llama_adapter_lora * adapter);
+    // Functions to access the adapter's GGUF metadata scalar values
+    // - The functions return the length of the string on success, or -1 on failure
+    // - The output string is always null-terminated and cleared on failure
+    // - When retrieving a string, an extra byte must be allocated to account for the null terminator
+    // - GGUF array values are not supported by these functions
+
+    // Get metadata value as a string by key name
+    LLAMA_API int32_t llama_adapter_meta_val_str(const struct llama_adapter_lora * adapter, const char * key, char * buf, size_t buf_size);
 
-    // Get the required LoRA prompt prefix. Returns a blank string if not applicable
-    LLAMA_API const char * llama_adapter_lora_prompt_prefix(struct llama_adapter_lora * adapter);
+    // Get the number of metadata key/value pairs
+    LLAMA_API int32_t llama_adapter_meta_count(const struct llama_adapter_lora * adapter);
+
+    // Get metadata key name by index
+    LLAMA_API int32_t llama_adapter_meta_key_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);
+
+    // Get metadata value as a string by index
+    LLAMA_API int32_t llama_adapter_meta_val_str_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);
 
     // Manually free a LoRA adapter
     // Note: loaded adapters will be free when the associated model is deleted
diff --git a/src/llama-adapter.cpp b/src/llama-adapter.cpp
@@ -163,13 +163,38 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
 
     // check metadata
     {
+        const gguf_context * gguf_ctx = ctx_gguf.get();
+
+        LLAMA_LOG_INFO("%s: Dumping metadata keys/values.\n", __func__);
+
+        // get metadata as string
+        for (int i = 0; i < gguf_get_n_kv(gguf_ctx); i++) {
+            gguf_type type = gguf_get_kv_type(gguf_ctx, i);
+            const std::string type_name =
+                type == GGUF_TYPE_ARRAY
+                ? format("%s[%s,%zu]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(gguf_ctx, i)), gguf_get_arr_n(gguf_ctx, i))
+                : gguf_type_name(type);
+            const char * name = gguf_get_key(gguf_ctx, i);
+            const std::string value = gguf_kv_to_str(gguf_ctx, i);
+
+            if (type != GGUF_TYPE_ARRAY) {
+                adapter.gguf_kv.emplace(name, value);
+            }
+
+            const size_t MAX_VALUE_LEN = 40;
+            std::string print_value = value.size() > MAX_VALUE_LEN ? format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()) : value;
+            replace_all(print_value, "\n", "\\n");
+
+            LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), print_value.c_str());
+        }
+
         auto get_kv_str = [&](const std::string & key) -> std::string {
-            int id = gguf_find_key(ctx_gguf.get(), key.c_str());
-            return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf.get(), id));
+            int id = gguf_find_key(gguf_ctx, key.c_str());
+            return id < 0 ? "" : std::string(gguf_get_val_str(gguf_ctx, id));
         };
         auto get_kv_f32 = [&](const std::string & key) -> float {
-            int id = gguf_find_key(ctx_gguf.get(), key.c_str());
-            return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf.get(), id);
+            int id = gguf_find_key(gguf_ctx, key.c_str());
+            return id < 0 ? 0.0f : gguf_get_val_f32(gguf_ctx, id);
         };
         LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
 
@@ -190,8 +215,6 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
         }
 
         adapter.alpha = get_kv_f32(llm_kv(LLM_KV_ADAPTER_LORA_ALPHA));
-        adapter.task_name = get_kv_str(llm_kv(LLM_KV_ADAPTER_LORA_TASK_NAME));
-        adapter.prompt_prefix = get_kv_str(llm_kv(LLM_KV_ADAPTER_LORA_PROMPT_PREFIX));
     }
 
     int n_tensors = gguf_get_n_tensors(ctx_gguf.get());
@@ -385,12 +408,43 @@ llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * p
     return nullptr;
 }
 
-const char * llama_adapter_lora_task_name(llama_adapter_lora * adapter) {
-    return adapter->task_name.c_str();
+int32_t llama_adapter_meta_val_str(const llama_adapter_lora * adapter, const char * key, char * buf, size_t buf_size) {
+    const auto & it = adapter->gguf_kv.find(key);
+    if (it == adapter->gguf_kv.end()) {
+        if (buf_size > 0) {
+            buf[0] = '\0';
+        }
+        return -1;
+    }
+    return snprintf(buf, buf_size, "%s", it->second.c_str());
 }
 
-const char * llama_adapter_lora_prompt_prefix(llama_adapter_lora * adapter) {
-    return adapter->prompt_prefix.c_str();
+int32_t llama_adapter_meta_count(const llama_adapter_lora * adapter) {
+    return (int)adapter->gguf_kv.size();
+}
+
+int32_t llama_adapter_meta_key_by_index(const llama_adapter_lora * adapter, int i, char * buf, size_t buf_size) {
+    if (i < 0 || i >= (int)adapter->gguf_kv.size()) {
+        if (buf_size > 0) {
+            buf[0] = '\0';
+        }
+        return -1;
+    }
+    auto it = adapter->gguf_kv.begin();
+    std::advance(it, i);
+    return snprintf(buf, buf_size, "%s", it->first.c_str());
+}
+
+int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size) {
+    if (i < 0 || i >= (int)adapter->gguf_kv.size()) {
+        if (buf_size > 0) {
+            buf[0] = '\0';
+        }
+        return -1;
+    }
+    auto it = adapter->gguf_kv.begin();
+    std::advance(it, i);
+    return snprintf(buf, buf_size, "%s", it->second.c_str());
 }
 
 void llama_adapter_lora_free(llama_adapter_lora * adapter) {
diff --git a/src/llama-adapter.h b/src/llama-adapter.h
@@ -66,8 +66,9 @@ struct llama_adapter_lora {
     std::vector<ggml_backend_buffer_ptr> bufs;
 
     float alpha;
-    std::string task_name;
-    std::string prompt_prefix;
+
+    // gguf metadata
+    std::unordered_map<std::string, std::string> gguf_kv;
 
     llama_adapter_lora() = default;
     ~llama_adapter_lora() = default;

Original file line number	Diff line number	Diff line change
`@@ -993,8 +993,8 @@ struct common_init_result common_init_from_params(common_params & params) {`
`993`	`993`	`}`
`994`	`994`
`995`	`995`	`la.ptr = lora.get();`
`996`		`- la.task_name = llama_adapter_lora_task_name(la.ptr);`
`997`		`- la.prompt_prefix = llama_adapter_lora_prompt_prefix(la.ptr);`
	`996`	`+ llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", la.task_name, sizeof(la.task_name));`
	`997`	`+ llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", la.prompt_prefix, sizeof(la.prompt_prefix));`
`998`	`998`	`iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters`
`999`	`999`	`}`
`1000`	`1000`