From b5c3eaa19f7e2a789dcc56fa92874700f0c1cefe Mon Sep 17 00:00:00 2001
From: juk <jukofyork@yahoo.com>
Date: Mon, 14 Jul 2025 14:48:57 +0100
Subject: [PATCH 1/4] Added `--lora-layer-range` option

---
 common/arg.cpp        |  8 ++++++++
 common/common.cpp     | 27 ++++++++++++++++-----------
 common/common.h       |  2 ++
 include/llama.h       |  5 ++++-
 src/llama-adapter.cpp | 31 ++++++++++++++++++++++++++++---
 5 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 56827a65908be..cc4aab8dd641e 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2472,6 +2472,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         }
         // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
     ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
+    add_opt(common_arg(
+        {"--lora-layer-range"}, "START", "END",
+        "layer range to apply the lora(s) to, start and end inclusive",
+        [](common_params & params, const std::string & start, const std::string & end) {
+            params.lora_layer_start = std::stoi(start);
+            params.lora_layer_end = std::stoi(end);
+        }
+    ));
     add_opt(common_arg(
         {"--control-vector"}, "FNAME",
         "add a control vector\nnote: this argument can be repeated to add multiple control vectors",
diff --git a/common/common.cpp b/common/common.cpp
index e4e71ad13fb59..c3613b06eef89 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -982,18 +982,23 @@ struct common_init_result common_init_from_params(common_params & params) {
     }
 
     // load and optionally apply lora adapters
-    for (auto & la : params.lora_adapters) {
-        llama_adapter_lora_ptr lora;
-        lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
-        if (lora == nullptr) {
-            LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
-            llama_free(lctx);
-            llama_model_free(model);
-            return iparams;
-        }
+    if (!params.lora_adapters.empty()) {
+        if (params.lora_layer_start <= 0) params.lora_layer_start = 1;
+        if (params.lora_layer_end   <= 0) params.lora_layer_end   = llama_model_n_layer(model);
+
+        for (auto & la : params.lora_adapters) {
+            llama_adapter_lora_ptr lora;
+            lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
+            if (lora == nullptr) {
+                LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
+                llama_free(lctx);
+                llama_model_free(model);
+                return iparams;
+            }
 
-        la.ptr = lora.get();
-        iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
+            la.ptr = lora.get();
+            iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
+        }
     }
 
     if (!params.lora_init_without_apply) {
diff --git a/common/common.h b/common/common.h
index a5abe32859fdd..5db658ba3f913 100644
--- a/common/common.h
+++ b/common/common.h
@@ -296,6 +296,8 @@ struct common_params {
     int32_t verbosity                  = 0;
     int32_t control_vector_layer_start = -1; // layer range for control vector
     int32_t control_vector_layer_end   = -1; // layer range for control vector
+    int32_t lora_layer_start           = -1; // layer range for lora
+    int32_t lora_layer_end             = -1; // layer range for lora
     bool    offline                    = false;
 
     int32_t ppl_stride      = 0;     // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
diff --git a/include/llama.h b/include/llama.h
index f73b1ab65fe6f..778db32364145 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -544,9 +544,12 @@ extern "C" {
     //
 
     // Load a LoRA adapter from file
+    // il_start and il_end are the layer range the lora should apply to (both inclusive)
     LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
             struct llama_model * model,
-            const char * path_lora);
+                    const char * path_lora,
+                       int32_t   il_start,
+                       int32_t   il_end);
 
     // Manually free a LoRA adapter
     // Note: loaded adapters will be free when the associated model is deleted
diff --git a/src/llama-adapter.cpp b/src/llama-adapter.cpp
index 8d94034aed95d..928e9b219d27d 100644
--- a/src/llama-adapter.cpp
+++ b/src/llama-adapter.cpp
@@ -145,7 +145,12 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) {
     return nullptr;
 }
 
-static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
+static void llama_adapter_lora_init_impl(
+        llama_model & model,
+        const char * path_lora,
+        llama_adapter_lora & adapter,
+        int32_t il_start,
+        int32_t il_end) {
     LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
 
     ggml_context * ctx_init;
@@ -224,6 +229,22 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
 
     for (ggml_tensor * cur = ggml_get_first_tensor(ctx.get()); cur; cur = ggml_get_next_tensor(ctx.get(), cur)) {
         std::string name(cur->name);
+
+        // check if this tensor has a layer number and is outside our range
+        size_t blk_pos = name.find("blk.");
+        if (blk_pos != std::string::npos) {
+            size_t start = blk_pos + 4; // skip "blk."
+            size_t end = name.find('.', start);
+            try {
+                int layer_num = std::stoi(name.substr(start, end - start));
+                if (layer_num < il_start || layer_num > il_end) {
+                    continue; // skip this tensor
+                }
+            } catch (const std::exception & err) {
+                LLAMA_LOG_ERROR("%s: failed to parse layer number from tensor '%s': %s\n", __func__, name.c_str(), err.what());
+            }
+        }
+
         if (str_endswith(name, ".lora_a")) {
             replace_all(name, ".lora_a", "");
             if (ab_map.find(name) == ab_map.end()) {
@@ -368,11 +389,15 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
     LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
 }
 
-llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
+llama_adapter_lora * llama_adapter_lora_init(
+        llama_model * model,
+        const char * path_lora,
+        int32_t il_start,
+        int32_t il_end) {
     llama_adapter_lora * adapter = new llama_adapter_lora();
 
     try {
-        llama_adapter_lora_init_impl(*model, path_lora, *adapter);
+        llama_adapter_lora_init_impl(*model, path_lora, *adapter, il_start, il_end);
         return adapter;
     } catch (const std::exception & err) {
         LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());

From 25da381e39b3cd9f7cc03f721856a1583edcea72 Mon Sep 17 00:00:00 2001
From: juk <jukofyork@yahoo.com>
Date: Mon, 14 Jul 2025 14:52:47 +0100
Subject: [PATCH 2/4] Added missing args to `llama_adapter_lora_init` call

---
 common/common.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/common.cpp b/common/common.cpp
index c3613b06eef89..b8714310617a5 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -988,7 +988,7 @@ struct common_init_result common_init_from_params(common_params & params) {
 
         for (auto & la : params.lora_adapters) {
             llama_adapter_lora_ptr lora;
-            lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
+            lora.reset(llama_adapter_lora_init(model, la.path.c_str(), params.lora_layer_start, params.lora_layer_end));
             if (lora == nullptr) {
                 LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
                 llama_free(lctx);

From 7828e4f0f73199200f64bb671c76bea1bf2c69b4 Mon Sep 17 00:00:00 2001
From: juk <jukofyork@yahoo.com>
Date: Mon, 14 Jul 2025 15:27:25 +0100
Subject: [PATCH 3/4] Fixed lower end of range as LoRAs can be applied to layer
 0

---
 common/common.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index b8714310617a5..069cdb0434de8 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -983,8 +983,8 @@ struct common_init_result common_init_from_params(common_params & params) {
 
     // load and optionally apply lora adapters
     if (!params.lora_adapters.empty()) {
-        if (params.lora_layer_start <= 0) params.lora_layer_start = 1;
-        if (params.lora_layer_end   <= 0) params.lora_layer_end   = llama_model_n_layer(model);
+        if (params.lora_layer_start < 0) params.lora_layer_start = 0;
+        if (params.lora_layer_end   < 0) params.lora_layer_end   = llama_model_n_layer(model);
 
         for (auto & la : params.lora_adapters) {
             llama_adapter_lora_ptr lora;

From 71f8b75d9b686a5e4a11f2067c7846c245a158f9 Mon Sep 17 00:00:00 2001
From: juk <jukofyork@yahoo.com>
Date: Mon, 14 Jul 2025 15:32:58 +0100
Subject: [PATCH 4/4] Updated the `README.md` for `llama-server`

---
 tools/server/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/server/README.md b/tools/server/README.md
index 6f962664f6774..ee7a52702b6e2 100644
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -82,6 +82,7 @@ The project is under active development, and we are [looking for feedback and co
 | `--override-kv KEY=TYPE:VALUE` | advanced option to override model metadata by key. may be specified multiple times.<br/>types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false |
 | `--lora FNAME` | path to LoRA adapter (can be repeated to use multiple adapters) |
 | `--lora-scaled FNAME SCALE` | path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters) |
+| `--lora-layer-range START END` | layer range to apply the LoRA(s) to, start and end inclusive |
 | `--control-vector FNAME` | add a control vector<br/>note: this argument can be repeated to add multiple control vectors |
 | `--control-vector-scaled FNAME SCALE` | add a control vector with user defined scaling SCALE<br/>note: this argument can be repeated to add multiple scaled control vectors |
 | `--control-vector-layer-range START END` | layer range to apply the control vector(s) to, start and end inclusive |