Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/cvector-generator/cvector-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,8 +422,7 @@ int main(int argc, char ** argv) {
int n_layers = llama_n_layer(model);
int n_embd = llama_n_embd(model);
// get model hint param (a.k.a model arch name)
char model_hint[128];
llama_model_meta_val_str(model, "general.architecture", model_hint, 128);
char* model_hint = llama_model_meta_val_str(model, "general.architecture");

// init train_context
train_context ctx_train(n_embd, n_layers);
Expand Down Expand Up @@ -496,6 +495,7 @@ int main(int argc, char ** argv) {

// write output vectors to gguf
export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
free(model_hint);

llama_backend_free();

Expand Down
9 changes: 4 additions & 5 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,13 +661,12 @@ struct server_context {
}

bool validate_model_chat_template() const {
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
std::string template_key = "tokenizer.chat_template";
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
if (res >= 0) {
char* tmpl = llama_model_meta_val_str(model, template_key.c_str());
if (tmpl) {
llama_chat_message chat[] = {{"user", "test"}};
std::string tmpl = std::string(model_template.data(), model_template.size());
int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
int32_t chat_res = llama_chat_apply_template(model, tmpl, chat, 1, true, nullptr, 0);
free(tmpl);
return chat_res > 0;
}
return false;
Expand Down
12 changes: 5 additions & 7 deletions examples/server/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,15 +336,13 @@ inline std::string format_chat(const struct llama_model * model, const std::stri

static std::string llama_get_chat_template(const struct llama_model * model) {
std::string template_key = "tokenizer.chat_template";
// call with NULL buffer to get the total size of the string
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), NULL, 0);
if (res < 0) {
char* model_template = llama_model_meta_val_str(model, template_key.c_str());
if (model_template == NULL) {
return "";
} else {
std::vector<char> model_template(res, 0);
llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
return std::string(model_template.data(), model_template.size());
}
std::string rv = model_template;
free(model_template);
return rv;
}

//
Expand Down
10 changes: 5 additions & 5 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -449,21 +449,21 @@ extern "C" {
LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);

// Functions to access the model's GGUF metadata scalar values
// - The functions return the length of the string on success, or -1 on failure
// - The output string is always null-terminated and cleared on failure
// - The functions return a copy of the string on success, and NULL on failure
// - The returned string must be deallocated
// - GGUF array values are not supported by these functions

// Get metadata value as a string by key name
LLAMA_API int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
LLAMA_API char* llama_model_meta_val_str(const struct llama_model * model, const char * key);

// Get the number of metadata key/value pairs
LLAMA_API int32_t llama_model_meta_count(const struct llama_model * model);

// Get metadata key name by index
LLAMA_API int32_t llama_model_meta_key_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size);
LLAMA_API char* llama_model_meta_key_by_index(const struct llama_model * model, int32_t i);

// Get metadata value as a string by index
LLAMA_API int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size);
LLAMA_API char* llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i);

// Get a string describing the model type
LLAMA_API int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
Expand Down
34 changes: 13 additions & 21 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20097,43 +20097,34 @@ float llama_rope_freq_scale_train(const struct llama_model * model) {
return model->hparams.rope_freq_scale_train;
}

int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
char* llama_model_meta_val_str(const struct llama_model * model, const char * key) {
const auto & it = model->gguf_kv.find(key);
if (it == model->gguf_kv.end()) {
if (buf_size > 0) {
buf[0] = '\0';
}
return -1;
return NULL;
}
return snprintf(buf, buf_size, "%s", it->second.c_str());
return strdup(it->second.c_str());
}

int32_t llama_model_meta_count(const struct llama_model * model) {
return (int)model->gguf_kv.size();
}

int32_t llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
char* llama_model_meta_key_by_index(const struct llama_model * model, int i) {
if (i < 0 || i >= (int)model->gguf_kv.size()) {
if (buf_size > 0) {
buf[0] = '\0';
}
return -1;
return NULL;
}
auto it = model->gguf_kv.begin();
std::advance(it, i);
return snprintf(buf, buf_size, "%s", it->first.c_str());
return strdup(it->first.c_str());
}

int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size) {
char* llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i) {
if (i < 0 || i >= (int)model->gguf_kv.size()) {
if (buf_size > 0) {
buf[0] = '\0';
}
return -1;
return NULL;
}
auto it = model->gguf_kv.begin();
std::advance(it, i);
return snprintf(buf, buf_size, "%s", it->second.c_str());
return strdup(it->second.c_str());
}

int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
Expand Down Expand Up @@ -22118,12 +22109,13 @@ int32_t llama_chat_apply_template(
// load template from model
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
std::string template_key = "tokenizer.chat_template";
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
if (res < 0) {
char* tmpl = llama_model_meta_val_str(model, template_key.c_str());
if (tmpl == NULL) {
// worst case: there is no information about template, we will use chatml by default
curr_tmpl = "chatml"; // see llama_chat_apply_template_internal
} else {
curr_tmpl = std::string(model_template.data(), model_template.size());
curr_tmpl = tmpl;
free(tmpl);
}
}

Expand Down