ggml-org · jukofyork · Jul 14, 2025 · Jul 14, 2025 · Jul 14, 2025 · Jul 14, 2025
@@ -2472,6 +2472,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         }
         // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
     ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
+    add_opt(common_arg(
+        {"--lora-layer-range"}, "START", "END",
+        "layer range to apply the lora(s) to, start and end inclusive",
+        [](common_params & params, const std::string & start, const std::string & end) {
+            params.lora_layer_start = std::stoi(start);
+            params.lora_layer_end = std::stoi(end);
+        }
+    ));
     add_opt(common_arg(
         {"--control-vector"}, "FNAME",
         "add a control vector\nnote: this argument can be repeated to add multiple control vectors",

@@ -982,18 +982,23 @@ struct common_init_result common_init_from_params(common_params & params) {
     }
 
     // load and optionally apply lora adapters
-    for (auto & la : params.lora_adapters) {
-        llama_adapter_lora_ptr lora;
-        lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
-        if (lora == nullptr) {
-            LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
-            llama_free(lctx);
-            llama_model_free(model);
-            return iparams;
-        }
+    if (!params.lora_adapters.empty()) {
+        if (params.lora_layer_start < 0) params.lora_layer_start = 0;
+        if (params.lora_layer_end   < 0) params.lora_layer_end   = llama_model_n_layer(model);
+
+        for (auto & la : params.lora_adapters) {
+            llama_adapter_lora_ptr lora;
+            lora.reset(llama_adapter_lora_init(model, la.path.c_str(), params.lora_layer_start, params.lora_layer_end));
+            if (lora == nullptr) {
+                LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
+                llama_free(lctx);
+                llama_model_free(model);
+                return iparams;
+            }
 
-        la.ptr = lora.get();
-        iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
+            la.ptr = lora.get();
+            iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
+        }
     }
 
     if (!params.lora_init_without_apply) {

@@ -296,6 +296,8 @@ struct common_params {
     int32_t verbosity                  = 0;
     int32_t control_vector_layer_start = -1; // layer range for control vector
     int32_t control_vector_layer_end   = -1; // layer range for control vector
+    int32_t lora_layer_start           = -1; // layer range for lora
+    int32_t lora_layer_end             = -1; // layer range for lora
     bool    offline                    = false;
 
     int32_t ppl_stride      = 0;     // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.

diff --git a/include/llama.h b/include/llama.h
@@ -544,9 +544,12 @@ extern "C" {
     //
 
     // Load a LoRA adapter from file
+    // il_start and il_end are the layer range the lora should apply to (both inclusive)
     LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
             struct llama_model * model,
-            const char * path_lora);
+                    const char * path_lora,
+                       int32_t   il_start,
+                       int32_t   il_end);
 
     // Manually free a LoRA adapter
     // Note: loaded adapters will be free when the associated model is deleted

@@ -145,7 +145,12 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) {
     return nullptr;
 }
 
-static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
+static void llama_adapter_lora_init_impl(
+        llama_model & model,
+        const char * path_lora,
+        llama_adapter_lora & adapter,
+        int32_t il_start,
+        int32_t il_end) {
     LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
 
     ggml_context * ctx_init;
@@ -224,6 +229,22 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
 
     for (ggml_tensor * cur = ggml_get_first_tensor(ctx.get()); cur; cur = ggml_get_next_tensor(ctx.get(), cur)) {
         std::string name(cur->name);
+
+        // check if this tensor has a layer number and is outside our range
+        size_t blk_pos = name.find("blk.");
+        if (blk_pos != std::string::npos) {
+            size_t start = blk_pos + 4; // skip "blk."
+            size_t end = name.find('.', start);
+            try {
+                int layer_num = std::stoi(name.substr(start, end - start));
+                if (layer_num < il_start || layer_num > il_end) {
+                    continue; // skip this tensor
+                }
+            } catch (const std::exception & err) {
+                LLAMA_LOG_ERROR("%s: failed to parse layer number from tensor '%s': %s\n", __func__, name.c_str(), err.what());
+            }
+        }
+
         if (str_endswith(name, ".lora_a")) {
             replace_all(name, ".lora_a", "");
             if (ab_map.find(name) == ab_map.end()) {
@@ -368,11 +389,15 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
     LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
 }
 
-llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
+llama_adapter_lora * llama_adapter_lora_init(
+        llama_model * model,
+        const char * path_lora,
+        int32_t il_start,
+        int32_t il_end) {
     llama_adapter_lora * adapter = new llama_adapter_lora();
 
     try {
-        llama_adapter_lora_init_impl(*model, path_lora, *adapter);
+        llama_adapter_lora_init_impl(*model, path_lora, *adapter, il_start, il_end);
         return adapter;
     } catch (const std::exception & err) {
         LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());

@@ -82,6 +82,7 @@ The project is under active development, and we are [looking for feedback and co
 | `--override-kv KEY=TYPE:VALUE` | advanced option to override model metadata by key. may be specified multiple times.<br/>types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false |
 | `--lora FNAME` | path to LoRA adapter (can be repeated to use multiple adapters) |
 | `--lora-scaled FNAME SCALE` | path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters) |
+| `--lora-layer-range START END` | layer range to apply the LoRA(s) to, start and end inclusive |
 | `--control-vector FNAME` | add a control vector<br/>note: this argument can be repeated to add multiple control vectors |
 | `--control-vector-scaled FNAME SCALE` | add a control vector with user defined scaling SCALE<br/>note: this argument can be repeated to add multiple scaled control vectors |
 | `--control-vector-layer-range START END` | layer range to apply the control vector(s) to, start and end inclusive |