Added --lora-layer-range option

jukofyork · jukofyork · commit b5c3eaa19f7e · 2025-07-14T14:48:57.000+01:00
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -2472,6 +2472,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         }
         // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
     ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
+    add_opt(common_arg(
+        {"--lora-layer-range"}, "START", "END",
+        "layer range to apply the lora(s) to, start and end inclusive",
+        [](common_params & params, const std::string & start, const std::string & end) {
+            params.lora_layer_start = std::stoi(start);
+            params.lora_layer_end = std::stoi(end);
+        }
+    ));
     add_opt(common_arg(
         {"--control-vector"}, "FNAME",
         "add a control vector\nnote: this argument can be repeated to add multiple control vectors",
diff --git a/common/common.cpp b/common/common.cpp
@@ -982,18 +982,23 @@ struct common_init_result common_init_from_params(common_params & params) {
     }
 
     // load and optionally apply lora adapters
-    for (auto & la : params.lora_adapters) {
-        llama_adapter_lora_ptr lora;
-        lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
-        if (lora == nullptr) {
-            LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
-            llama_free(lctx);
-            llama_model_free(model);
-            return iparams;
-        }
+    if (!params.lora_adapters.empty()) {
+        if (params.lora_layer_start <= 0) params.lora_layer_start = 1;
+        if (params.lora_layer_end   <= 0) params.lora_layer_end   = llama_model_n_layer(model);
+
+        for (auto & la : params.lora_adapters) {
+            llama_adapter_lora_ptr lora;
+            lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
+            if (lora == nullptr) {
+                LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
+                llama_free(lctx);
+                llama_model_free(model);
+                return iparams;
+            }
 
-        la.ptr = lora.get();
-        iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
+            la.ptr = lora.get();
+            iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
+        }
     }
 
     if (!params.lora_init_without_apply) {
diff --git a/common/common.h b/common/common.h
@@ -296,6 +296,8 @@ struct common_params {
     int32_t verbosity                  = 0;
     int32_t control_vector_layer_start = -1; // layer range for control vector
     int32_t control_vector_layer_end   = -1; // layer range for control vector
+    int32_t lora_layer_start           = -1; // layer range for lora
+    int32_t lora_layer_end             = -1; // layer range for lora
     bool    offline                    = false;
 
     int32_t ppl_stride      = 0;     // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
diff --git a/include/llama.h b/include/llama.h
@@ -544,9 +544,12 @@ extern "C" {
     //
 
     // Load a LoRA adapter from file
+    // il_start and il_end are the layer range the lora should apply to (both inclusive)
     LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
             struct llama_model * model,
-            const char * path_lora);
+                    const char * path_lora,
+                       int32_t   il_start,
+                       int32_t   il_end);
 
     // Manually free a LoRA adapter
     // Note: loaded adapters will be free when the associated model is deleted
diff --git a/src/llama-adapter.cpp b/src/llama-adapter.cpp
@@ -145,7 +145,12 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) {
     return nullptr;
 }
 
-static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
+static void llama_adapter_lora_init_impl(
+        llama_model & model,
+        const char * path_lora,
+        llama_adapter_lora & adapter,
+        int32_t il_start,
+        int32_t il_end) {
     LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
 
     ggml_context * ctx_init;
@@ -224,6 +229,22 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
 
     for (ggml_tensor * cur = ggml_get_first_tensor(ctx.get()); cur; cur = ggml_get_next_tensor(ctx.get(), cur)) {
         std::string name(cur->name);
+
+        // check if this tensor has a layer number and is outside our range
+        size_t blk_pos = name.find("blk.");
+        if (blk_pos != std::string::npos) {
+            size_t start = blk_pos + 4; // skip "blk."
+            size_t end = name.find('.', start);
+            try {
+                int layer_num = std::stoi(name.substr(start, end - start));
+                if (layer_num < il_start || layer_num > il_end) {
+                    continue; // skip this tensor
+                }
+            } catch (const std::exception & err) {
+                LLAMA_LOG_ERROR("%s: failed to parse layer number from tensor '%s': %s\n", __func__, name.c_str(), err.what());
+            }
+        }
+
         if (str_endswith(name, ".lora_a")) {
             replace_all(name, ".lora_a", "");
             if (ab_map.find(name) == ab_map.end()) {
@@ -368,11 +389,15 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
     LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
 }
 
-llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
+llama_adapter_lora * llama_adapter_lora_init(
+        llama_model * model,
+        const char * path_lora,
+        int32_t il_start,
+        int32_t il_end) {
     llama_adapter_lora * adapter = new llama_adapter_lora();
 
     try {
-        llama_adapter_lora_init_impl(*model, path_lora, *adapter);
+        llama_adapter_lora_init_impl(*model, path_lora, *adapter, il_start, il_end);
         return adapter;
     } catch (const std::exception & err) {
         LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());