Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2472,6 +2472,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
add_opt(common_arg(
{"--lora-layer-range"}, "START", "END",
"layer range to apply the lora(s) to, start and end inclusive",
[](common_params & params, const std::string & start, const std::string & end) {
params.lora_layer_start = std::stoi(start);
params.lora_layer_end = std::stoi(end);
}
));
add_opt(common_arg(
{"--control-vector"}, "FNAME",
"add a control vector\nnote: this argument can be repeated to add multiple control vectors",
Expand Down
27 changes: 16 additions & 11 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -982,18 +982,23 @@ struct common_init_result common_init_from_params(common_params & params) {
}

// load and optionally apply lora adapters
for (auto & la : params.lora_adapters) {
llama_adapter_lora_ptr lora;
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
if (lora == nullptr) {
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
llama_free(lctx);
llama_model_free(model);
return iparams;
}
if (!params.lora_adapters.empty()) {
if (params.lora_layer_start < 0) params.lora_layer_start = 0;
if (params.lora_layer_end < 0) params.lora_layer_end = llama_model_n_layer(model);

for (auto & la : params.lora_adapters) {
llama_adapter_lora_ptr lora;
lora.reset(llama_adapter_lora_init(model, la.path.c_str(), params.lora_layer_start, params.lora_layer_end));
if (lora == nullptr) {
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
llama_free(lctx);
llama_model_free(model);
return iparams;
}

la.ptr = lora.get();
iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
la.ptr = lora.get();
iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
}
}

if (!params.lora_init_without_apply) {
Expand Down
2 changes: 2 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ struct common_params {
int32_t verbosity = 0;
int32_t control_vector_layer_start = -1; // layer range for control vector
int32_t control_vector_layer_end = -1; // layer range for control vector
int32_t lora_layer_start = -1; // layer range for lora
int32_t lora_layer_end = -1; // layer range for lora
bool offline = false;

int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
Expand Down
5 changes: 4 additions & 1 deletion include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -544,9 +544,12 @@ extern "C" {
//

// Load a LoRA adapter from file
// il_start and il_end are the layer range the lora should apply to (both inclusive)
LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
struct llama_model * model,
const char * path_lora);
const char * path_lora,
int32_t il_start,
int32_t il_end);

// Manually free a LoRA adapter
// Note: loaded adapters will be free when the associated model is deleted
Expand Down
31 changes: 28 additions & 3 deletions src/llama-adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,12 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) {
return nullptr;
}

static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
static void llama_adapter_lora_init_impl(
llama_model & model,
const char * path_lora,
llama_adapter_lora & adapter,
int32_t il_start,
int32_t il_end) {
LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);

ggml_context * ctx_init;
Expand Down Expand Up @@ -224,6 +229,22 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_

for (ggml_tensor * cur = ggml_get_first_tensor(ctx.get()); cur; cur = ggml_get_next_tensor(ctx.get(), cur)) {
std::string name(cur->name);

// check if this tensor has a layer number and is outside our range
size_t blk_pos = name.find("blk.");
if (blk_pos != std::string::npos) {
size_t start = blk_pos + 4; // skip "blk."
size_t end = name.find('.', start);
try {
int layer_num = std::stoi(name.substr(start, end - start));
if (layer_num < il_start || layer_num > il_end) {
continue; // skip this tensor
}
} catch (const std::exception & err) {
LLAMA_LOG_ERROR("%s: failed to parse layer number from tensor '%s': %s\n", __func__, name.c_str(), err.what());
}
}

if (str_endswith(name, ".lora_a")) {
replace_all(name, ".lora_a", "");
if (ab_map.find(name) == ab_map.end()) {
Expand Down Expand Up @@ -368,11 +389,15 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
}

llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
llama_adapter_lora * llama_adapter_lora_init(
llama_model * model,
const char * path_lora,
int32_t il_start,
int32_t il_end) {
llama_adapter_lora * adapter = new llama_adapter_lora();

try {
llama_adapter_lora_init_impl(*model, path_lora, *adapter);
llama_adapter_lora_init_impl(*model, path_lora, *adapter, il_start, il_end);
return adapter;
} catch (const std::exception & err) {
LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());
Expand Down
1 change: 1 addition & 0 deletions tools/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ The project is under active development, and we are [looking for feedback and co
| `--override-kv KEY=TYPE:VALUE` | advanced option to override model metadata by key. may be specified multiple times.<br/>types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false |
| `--lora FNAME` | path to LoRA adapter (can be repeated to use multiple adapters) |
| `--lora-scaled FNAME SCALE` | path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters) |
| `--lora-layer-range START END` | layer range to apply the LoRA(s) to, start and end inclusive |
| `--control-vector FNAME` | add a control vector<br/>note: this argument can be repeated to add multiple control vectors |
| `--control-vector-scaled FNAME SCALE` | add a control vector with user defined scaling SCALE<br/>note: this argument can be repeated to add multiple scaled control vectors |
| `--control-vector-layer-range START END` | layer range to apply the control vector(s) to, start and end inclusive |
Expand Down
Loading