feat: alora migration documentation and nit fixes

YashasviChaurasia · YashasviChaurasia · commit 02fd50cb5621 · 2025-10-09T23:43:49.000+05:30
Signed-off-by: yashasvi &lt;yashasvi@ibm.com&gt;
diff --git a/docs/tuning-techniques.md b/docs/tuning-techniques.md
@@ -214,17 +214,17 @@ Activated LoRA (aLoRA) is a new low rank adapter architecture that allows for re
 
 [Github](https://github.com/IBM/activated-lora)
 
-**Usage** Usage is very similar to standard LoRA, with the key difference that an invocation_string must be specified so that the model knows when to turn on i.e "activate" the adapter weights. The model will scan any input strings (during training or at test time) for this invocation_string, and activate the adapter weights 1 token after the start of the sequence. If there are multiple instances of the invocation_string in the same input, it will activate at the last such instance.
+**Usage** Usage is very similar to standard LoRA, with the key difference that an alora_invocation_string must be specified so that the model knows when to turn on i.e "activate" the adapter weights. The model will scan any input strings (during training or at test time) for this alora_invocation_string, and activate the adapter weights 1 token after the start of the sequence. If there are multiple instances of the alora_invocation_string in the same input, it will activate at the last such instance.
 
 **Note** Often (not always) aLoRA requires higher rank (r) than LoRA. r=32 can be a good starting point for challenging tasks.
 
-**Installation** The Activated LoRA requirements are an optional install in pyproject.toml (activated-lora)
+**Installation** ALoRA support is provided via [HF PEFT](https://github.com/huggingface/peft) library later than this [patch](https://github.com/huggingface/peft/pull/2609)
 
 Set `peft_method` to `"alora"`. 
 
-You *must* pass in an invocation_string argument. This invocation_string *must be present* in both training data inputs and the input at test time. A good solution is to set invocation_string = response_template, this will ensure that every training input will have the invocation_string present. We keep these separate arguments for flexibility. It is most robust if the invocation_string begins and ends with special tokens.
+You *must* pass in an alora_invocation_string argument. This alora_invocation_string *must be present* in both training data inputs and the input at test time. A good solution is to set alora_invocation_string = response_template, this will ensure that every training input will have the alora_invocation_string present. We keep these separate arguments for flexibility. It is most robust if the alora_invocation_string begins and ends with special tokens.
 
-You can additionally pass any arguments from [aLoraConfig](https://github.com/IBM/activated-lora/blob/fms-hf-tuning/alora/config.py#L35), see the LoRA section for examples.
+You can additionally pass any arguments from `LoraConfig`, see the LoRA section for examples.
 
 Example command to run, here using the ([Granite Instruct response template](https://huggingface.co/ibm-granite/granite-3.0-8b-instruct/blob/main/tokenizer_config.json#L188)) as the invocation sequence:
 
@@ -236,9 +236,9 @@ python tuning/sft_trainer.py \
 --output_dir $OUTPUT_PATH \
 --num_train_epochs 40 \
 --per_device_train_batch_size 4 \
----learning_rate 1e-4 \
+--learning_rate 1e-4 \
 --response_template "<|start_of_role|>assistant<|end_of_role|>" \ #this example uses special tokens in the Granite tokenizer, adjust for other models
---invocation_string "<|start_of_role|>assistant<|end_of_role|>" \
+--alora_invocation_string "<|start_of_role|>assistant<|end_of_role|>" \
 --dataset_text_field "output" \
 --peft_method "alora" \
 --r 32 \
@@ -257,7 +257,7 @@ Equally you can pass in a JSON configuration for running tuning. See [build doc]
     "per_device_train_batch_size": 4,
     "learning_rate": 1e-4,
     "response_template": "<|start_of_role|>assistant<|end_of_role|>",
-    "invocation_string": "<|start_of_role|>assistant<|end_of_role|>",
+    "alora_invocation_string": "<|start_of_role|>assistant<|end_of_role|>",
     "dataset_text_field": "output",
     "peft_method": "alora",
     "r": 32,
@@ -306,15 +306,15 @@ class SaveBestModelCallback(TrainerCallback):
 Example inference:
 ```py
 # Load the model
-loaded_model = TunedCausalLM.load(ALORA_MODEL, BASE_MODEL_NAME, use_alora=True)
+loaded_model = TunedCausalLM.load(ALORA_MODEL, BASE_MODEL_NAME)
 
 # Retrieve the invocation string from the model config
-invocation_string = loaded_model.peft_model.peft_config[
+alora_invocation_string = loaded_model.peft_model.peft_config[
     loaded_model.peft_model.active_adapter
-].invocation_string
+].alora_invocation_string
 
 # In this case, we have the invocation string at the end of the input 
-input_string = "Simply put, the theory of relativity states that \n" + invocation_string
+input_string = "Simply put, the theory of relativity states that \n" + alora_invocation_string
 
 # Run inference on the text
 output_inference = loaded_model.run(
diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py
@@ -97,7 +97,7 @@
     load_and_validate_data_config,
 )
 from tuning.data.data_handlers import DataHandler, DataHandlerType
-from tuning.utils.import_utils import is_alora_available, is_fms_accelerate_available
+from tuning.utils.import_utils import is_fms_accelerate_available
 
 MODEL_NAME = MAYKEYE_TINY_LLAMA_CACHED
 
@@ -153,7 +153,6 @@
 
 if hasattr(HFLoraConfig, "alora_invocation_tokens"):
     PEFT_ALORA_ARGS = peft_config.LoraConfig(r=8, lora_alpha=32, lora_dropout=0.05)
-    PEFT_ALORA_ARGS.alora_invocation_tokens = [42]
 else:
     PEFT_ALORA_ARGS = None
 
@@ -745,10 +744,6 @@ def test_run_causallm_lora_and_inference(request, target_modules, expected):
         assert "Simply put, the theory of relativity states that" in output_inference
 
 
-@pytest.mark.skipif(
-    not is_alora_available(),
-    reason="Only runs if alora is installed",
-)
 @pytest.mark.parametrize(
     "target_modules,expected",
     target_modules_val_map,
diff --git a/tuning/config/peft_config.py b/tuning/config/peft_config.py
@@ -55,6 +55,32 @@ class LoraConfig(HFLoraConfig):
     lora_alpha: int = 32
     lora_dropout: float = 0.05
 
+    # Activated LoRA fields (optional)
+    alora_invocation_string: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": (
+                "Human readable invocation string for aLoRA. If set, the training code "
+                "will tokenize this with the model tokenizer and persist the resulting "
+                "token ids in `alora_invocation_tokens` so the adapter can be activated "
+                "at inference time. This field is optional; users may instead set the "
+                "`alora_invocation_tokens` directly (list of ints)."
+            )
+        },
+    )
+
+    alora_invocation_tokens: Optional[List[int]] = field(
+        default=None,
+        metadata={
+            "help": (
+                "Token ids for the aLoRA invocation sequence. If provided, these will be "
+                "used directly and will take precedence over `alora_invocation_string`. "
+                "If not provided but `alora_invocation_string` is, the training flow will "
+                "tokenize the string and populate this field before training continues."
+            )
+        },
+    )
+
     # HACK: The following list of arguments listed below
     # is a fix which reduces the field annotation from
     # Optional[List[str], str] type to Optional[List[str]] type
diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py
@@ -363,18 +363,24 @@ def train(
     additional_metrics["model_load_time"] = time.time() - model_load_time
 
     # Convert legacy aLoRA string → token IDs (PEFT-native aLoRA)
-    if peft_config is not None and hasattr(peft_config, "alora_invocation_string"):
-        inv_str = getattr(peft_config, "alora_invocation_string")
-        if not inv_str:
-            raise ValueError(
-                "`--invocation_string` is required when using --peft_method alora."
-            )
-        alora_tokens = tokenizer.encode(inv_str, add_special_tokens=False)
-        if not alora_tokens:
-            raise ValueError(
-                "`--invocation_string` produced no tokens; check your tokenizer/template."
-            )
-        setattr(peft_config, "alora_invocation_tokens", alora_tokens)
+    if peft_config is not None:
+        inv_str = getattr(peft_config, "alora_invocation_string", None)
+        has_string = isinstance(inv_str, str) and inv_str.strip() != ""
+        has_tokens = hasattr(peft_config, "alora_invocation_tokens") and bool(
+            getattr(peft_config, "alora_invocation_tokens")
+        )
+
+        if has_string:
+            alora_tokens = tokenizer.encode(inv_str, add_special_tokens=False)
+            if not alora_tokens:
+                raise ValueError(
+                    "`alora_invocation_string` produced no tokens; check your tokenizer/template."
+                )
+            setattr(peft_config, "alora_invocation_tokens", alora_tokens)
+
+        elif not has_tokens:
+            # Only raise if neither tokens nor string present
+            raise ValueError("`alora_invocation_string` is required when using aLoRA.")
 
     peft_config = get_hf_peft_config(
         task_type,
@@ -587,13 +593,13 @@ def get_parser():
         help='Pass a json string representing K:V pairs to be associated\
               to the tuning run in the tracker. e.g. \'{"gpu":"A100-80G"}\'',
     )
-    parser.add_argument(
-        "--invocation_string",
-        type=str,
-        default=None,
-        help="Pass a invocation string that will be used to activate the aLoRA.\
-            This needs to be present in each training data row.",
-    )
+    # parser.add_argument(
+    #     "--alora_invocation_string",
+    #     type=str,
+    #     default=None,
+    #     help="Pass a invocation string that will be used to activate the aLoRA.\
+    #         This needs to be present in each training data row.",
+    # )
     return parser
 
 
@@ -651,7 +657,7 @@ def parse_arguments(parser, json_config=None):
         peft_method = json_config.get("peft_method")
         exp_metadata = json_config.get("exp_metadata")
         quantization_method = json_config.get("quantization_method")
-        invocation_string = json_config.get("invocation_string")
+        # alora_invocation_string = json_config.get("alora_invocation_string")
     else:
         (
             model_args,
@@ -673,13 +679,11 @@ def parse_arguments(parser, json_config=None):
         peft_method = additional.peft_method
         exp_metadata = additional.exp_metadata
         quantization_method = additional.quantization_method
-        invocation_string = additional.invocation_string
+        # alora_invocation_string = additional.alora_invocation_string
 
     if peft_method == peft_config.PEFT_METHOD.ALORA.value:
-        if invocation_string is None:
-            raise ValueError("invocation_string is required for aLoRA usage")
         tune_config = lora_config
-        setattr(tune_config, "alora_invocation_string", invocation_string)
+        # setattr(tune_config, "alora_invocation_string", alora_invocation_string)
     elif peft_method == peft_config.PEFT_METHOD.LORA.value:
         tune_config = lora_config
     elif peft_method == peft_config.PEFT_METHOD.PT.value:
diff --git a/tuning/utils/config_utils.py b/tuning/utils/config_utils.py
@@ -61,7 +61,10 @@ def create_tuning_config(peft_method, **kwargs):
         "pt",
         "None",
     ], f"peft config {peft_method} not defined in peft.py"
-    if peft_method in ("alora", "lora"):
+    if peft_method in (
+        peft_config.PEFT_METHOD.ALORA.value,
+        peft_config.PEFT_METHOD.LORA.value,
+    ):
         tune_config = peft_config.LoraConfig()
         update_config(tune_config, **kwargs)
     elif peft_method == "pt":
diff --git a/tuning/utils/import_utils.py b/tuning/utils/import_utils.py
@@ -32,14 +32,3 @@ def is_fms_accelerate_available(
         if not _is_package_available(n):
             return False
     return True
-
-
-def is_alora_available() -> bool:
-    try:
-        # Third Party
-        from peft import LoraConfig  # pylint: disable=import-outside-toplevel
-
-        # Check if LoraConfig has the new Activated LoRA field
-        return hasattr(LoraConfig, "alora_invocation_tokens")
-    except ImportError:
-        return False