[QEff Finetune] : fix task_type variable in configs (quic#514)

quic-mamta · mamtsing · web-flow · commit 7ba58e4353d4 · 2025-07-14T17:10:45.000+05:30
1. fix task_type variable in configs
2. enabled passing peft_config yaml/json file from command line.
3. updated run_ft_model.py

---------

Signed-off-by: Mamta Singh &lt;mamtsing@qti.qualcomm.com&gt;
Co-authored-by: Mamta Singh &lt;mamtsing@qti.qualcomm.com&gt;
diff --git a/QEfficient/cloud/finetune.py b/QEfficient/cloud/finetune.py
@@ -8,7 +8,7 @@
 import logging
 import random
 import warnings
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 import numpy as np
 import torch
@@ -27,6 +27,7 @@
     update_config,
 )
 from QEfficient.finetune.utils.dataset_utils import get_dataloader
+from QEfficient.finetune.utils.helper import Task_Mode
 from QEfficient.finetune.utils.logging_utils import logger
 from QEfficient.finetune.utils.parser import get_finetune_parser
 from QEfficient.finetune.utils.train_utils import (
@@ -90,14 +91,13 @@ def setup_seeds(seed: int) -> None:
 
 
 def load_model_and_tokenizer(
-    train_config: TrainConfig, dataset_config: Any, peft_config_file: str, **kwargs
+    train_config: TrainConfig, dataset_config: Any, **kwargs
 ) -> tuple[AutoModelForCausalLM, AutoTokenizer]:
     """Load the pre-trained model and tokenizer from Hugging Face.
 
     Args:
         config (TrainConfig): Training configuration object containing model and tokenizer names.
         dataset_config (Any): A dataclass object representing dataset configuration.
-        peft_config_file (str): Path to PEFT config file used for PEFT finetuning.
         kwargs: Additional arguments to override PEFT config.
 
     Returns:
@@ -113,7 +113,7 @@ def load_model_and_tokenizer(
     """
     logger.log_rank_zero(f"Loading HuggingFace model for {train_config.model_name}")
     pretrained_model_path = hf_download(train_config.model_name)
-    if train_config.task_type == "seq_classification":
+    if train_config.task_mode == Task_Mode.SEQ_CLASSIFICATION:
         model = AutoModelForSequenceClassification.from_pretrained(
             pretrained_model_path,
             num_labels=dataset_config.num_labels,
@@ -166,21 +166,17 @@ def load_model_and_tokenizer(
                 "Given model doesn't support gradient checkpointing. Please disable it and run it.", RuntimeError
             )
 
-    model = apply_peft(model, train_config, peft_config_file, **kwargs)
+    model = apply_peft(model, train_config, **kwargs)
 
     return model, tokenizer
 
 
-def apply_peft(
-    model: AutoModel, train_config: TrainConfig, peft_config_file: Dict, **kwargs
-) -> Union[AutoModel, PeftModel]:
+def apply_peft(model: AutoModel, train_config: TrainConfig, **kwargs) -> Union[AutoModel, PeftModel]:
     """Apply Parameter-Efficient Fine-Tuning (PEFT) to the model if enabled.
 
     Args:
         model (AutoModel): Huggingface model.
         train_config (TrainConfig): Training configuration object.
-        peft_config_file (str, optional): Path to YAML/JSON file containing
-            PEFT (LoRA) config. Defaults to None.
         kwargs: Additional arguments to override PEFT config params.
 
     Returns:
@@ -197,7 +193,7 @@ def apply_peft(
         peft_config = model.peft_config
     # Generate the peft config and start fine-tuning from original model
     else:
-        peft_config = generate_peft_config(train_config, peft_config_file, **kwargs)
+        peft_config = generate_peft_config(train_config, **kwargs)
         model = get_peft_model(model, peft_config)
     print_trainable_parameters(model)
 
@@ -254,12 +250,11 @@ def setup_dataloaders(
     return train_dataloader, eval_dataloader, longest_seq_length
 
 
-def main(peft_config_file: str = None, **kwargs) -> None:
+def main(**kwargs) -> None:
     """
     Fine-tune a model on QAIC hardware with configurable training and LoRA parameters.
 
     Args:
-        peft_config_file (str, optional): Path to YAML/JSON file containing PEFT (LoRA) config. Defaults to None.
         kwargs: Additional arguments to override TrainConfig.
 
     Example:
@@ -286,7 +281,7 @@ def main(peft_config_file: str = None, **kwargs) -> None:
 
     setup_distributed_training(train_config)
     setup_seeds(train_config.seed)
-    model, tokenizer = load_model_and_tokenizer(train_config, dataset_config, peft_config_file, **kwargs)
+    model, tokenizer = load_model_and_tokenizer(train_config, dataset_config, **kwargs)
 
     # Create DataLoaders for the training and validation dataset
     train_dataloader, eval_dataloader, longest_seq_length = setup_dataloaders(train_config, dataset_config, tokenizer)
@@ -295,7 +290,6 @@ def main(peft_config_file: str = None, **kwargs) -> None:
         f"passed context length is {train_config.context_length} and overall model's context length is "
         f"{model.config.max_position_embeddings}"
     )
-
     model.to(train_config.device)
     optimizer = optim.AdamW(model.parameters(), lr=train_config.lr, weight_decay=train_config.weight_decay)
     scheduler = StepLR(optimizer, step_size=1, gamma=train_config.gamma)
diff --git a/QEfficient/finetune/configs/training.py b/QEfficient/finetune/configs/training.py
@@ -8,6 +8,8 @@
 import logging
 from dataclasses import dataclass
 
+from QEfficient.finetune.utils.helper import Batching_Strategy, Device, Peft_Method, Task_Mode
+
 
 # Configuration Classes
 @dataclass
@@ -35,10 +37,11 @@ class TrainConfig:
         gamma (float): Learning rate decay factor (default: 0.85).
         seed (int): Random seed for reproducibility (default: 42).
         dataset (str): Dataset name for training (default: "alpaca_dataset").
-        task_type (str): Type of task for which the finetuning is to be done. Options: "generation" and "seq_classification". (default: "generation")
+        task_mode (str): Mode of task for which the finetuning is to be done. Options: "generation" and "seq_classification". (default: "generation")
         use_peft (bool): Whether to use PEFT (default: True).
         peft_method (str): Parameter-efficient fine-tuning method (default: "lora").
-        from_peft_checkpoint (str): Path to PEFT checkpoint (default: "").
+        peft_config_file (str): Path to YAML/JSON file containing PEFT (LoRA) config. (default: None)
+        from_peft_checkpoint (str): Path to PEFT checkpoint (default: None).
         output_dir (str): Directory to save outputs (default: "training_results").
         save_model (bool): Save the trained model (default: True).
         save_metrics (bool): Save training metrics (default: True).
@@ -49,8 +52,9 @@ class TrainConfig:
         convergence_loss (float): Loss threshold for convergence (default: 1e-4).
         use_profiler (bool): Enable profiling (default: False).
         enable_ddp (bool): Enable distributed data parallel (default: False).
-        dump_root_dir (str): Directory for mismatch dumps (default: "mismatches/step_").
         opByOpVerifier (bool): Enable operation-by-operation verification (default: False).
+        dump_logs (bool): Whether to dump logs (default: True).
+        log_level (str): logging level (default: logging.INFO)
     """
 
     model_name: str = "meta-llama/Llama-3.2-1B"
@@ -66,22 +70,23 @@ class TrainConfig:
     num_epochs: int = 1
     max_train_step: int = 0
     max_eval_step: int = 0
-    device: str = "qaic"
+    device: str = Device.QAIC.value
     num_workers_dataloader: int = 1
     lr: float = 3e-4
     weight_decay: float = 0.0
     gamma: float = 0.85  # multiplicatively decay the learning rate by gamma after each epoch
     seed: int = 42
     dataset: str = "alpaca_dataset"
-    task_type: str = "generation"  # "generation" / "seq_classification"
+    task_mode: str = Task_Mode.GENERATION.value  # "generation" / "seq_classification"
     use_peft: bool = True  # use parameter efficient finetuning
-    peft_method: str = "lora"
-    from_peft_checkpoint: str = ""  # if not empty and peft_method='lora', will load the peft checkpoint and resume the fine-tuning on that checkpoint
+    peft_method: str = Peft_Method.LORA.value
+    peft_config_file: str = None
+    from_peft_checkpoint: str = None  # if not empty and peft_method='lora', will load the peft checkpoint and resume the fine-tuning on that checkpoint
     output_dir: str = "training_results"
     save_model: bool = True
     save_metrics: bool = True  # saves training metrics to a json file for later plotting
     intermediate_step_save: int = 1000
-    batching_strategy: str = "packing"
+    batching_strategy: str = Batching_Strategy.PADDING.value
     enable_ddp: bool = False
     enable_sorting_for_ddp: bool = True
     convergence_counter: int = 5  # its value should be >= 1, stop fine tuning when loss <= convergence_loss (defined below) for #convergence_counter steps
diff --git a/QEfficient/finetune/utils/config_utils.py b/QEfficient/finetune/utils/config_utils.py
@@ -18,6 +18,7 @@
 from QEfficient.finetune.configs.peft_config import LoraConfig
 from QEfficient.finetune.configs.training import TrainConfig
 from QEfficient.finetune.dataset.dataset_config import DATASET_PREPROC
+from QEfficient.finetune.utils.helper import Peft_Method
 from QEfficient.finetune.utils.logging_utils import logger
 
 
@@ -52,25 +53,24 @@ def update_config(config, **kwargs):
                 logger.debug(f"Unknown parameter '{k}' for config type '{config_type}'")
 
 
-def generate_peft_config(train_config: TrainConfig, peft_config_file: str = None, **kwargs) -> Any:
+def generate_peft_config(train_config: TrainConfig, **kwargs) -> Any:
     """Generate a PEFT-compatible configuration from a custom config based on peft_method.
 
     Args:
         train_config (TrainConfig): Training configuration with peft_method.
-        custom_config: Custom configuration object (e.g., LoraConfig).
 
     Returns:
         Any: A PEFT-specific configuration object (e.g., PeftLoraConfig).
 
     Raises:
         RuntimeError: If the peft_method is not supported.
     """
-    if peft_config_file:
-        peft_config_data = load_config_file(peft_config_file)
-        validate_config(peft_config_data, config_type="lora")
+    if train_config.peft_config_file:
+        peft_config_data = load_config_file(train_config.peft_config_file)
+        validate_config(peft_config_data, config_type=Peft_Method.LORA)
         peft_config = PeftLoraConfig(**peft_config_data)
     else:
-        config_map = {"lora": (LoraConfig, PeftLoraConfig)}
+        config_map = {Peft_Method.LORA: (LoraConfig, PeftLoraConfig)}
         if train_config.peft_method not in config_map:
             logger.raise_error(f"Peft config not found: {train_config.peft_method}", RuntimeError)
 
@@ -105,7 +105,7 @@ def generate_dataset_config(dataset_name: str) -> Any:
     return dataset_config
 
 
-def validate_config(config_data: Dict[str, Any], config_type: str = "lora") -> None:
+def validate_config(config_data: Dict[str, Any], config_type: str = Peft_Method.LORA) -> None:
     """Validate the provided YAML/JSON configuration for required fields and types.
 
     Args:
@@ -120,7 +120,7 @@ def validate_config(config_data: Dict[str, Any], config_type: str = "lora") -> N
         - Validates required fields for LoraConfig: r, lora_alpha, target_modules.
         - Ensures types match expected values (int, float, list, etc.).
     """
-    if config_type.lower() != "lora":
+    if config_type.lower() != Peft_Method.LORA:
         logger.raise_error(f"Unsupported config_type: {config_type}. Only 'lora' is supported.", ValueError)
 
     required_fields = {
diff --git a/QEfficient/finetune/utils/helper.py b/QEfficient/finetune/utils/helper.py
@@ -6,6 +6,7 @@
 # -----------------------------------------------------------------------------
 import os
 from contextlib import nullcontext
+from enum import Enum
 
 import torch
 
@@ -15,10 +16,28 @@
     print(f"Warning: {e}. Moving ahead without these qaic modules.")
 
 
-TASK_TYPE = ["generation", "seq_classification"]
-PEFT_METHOD = ["lora"]
-DEVICE = ["qaic", "cpu", "cuda"]
-BATCHING_STRATEGY = ["padding", "packing"]
+class Batching_Strategy(str, Enum):
+    PADDING = "padding"
+    PACKING = "packing"
+
+
+class Device(str, Enum):
+    QAIC = "qaic"
+    CPU = "cpu"
+    CUDA = "cuda"
+
+
+class Peft_Method(str, Enum):
+    LORA = "lora"
+
+
+class Task_Mode(str, Enum):
+    GENERATION = "generation"
+    SEQ_CLASSIFICATION = "seq_classification"
+
+
+def enum_names(enum_cls):
+    return [member.value for member in enum_cls]
 
 
 def is_rank_zero():
diff --git a/QEfficient/finetune/utils/parser.py b/QEfficient/finetune/utils/parser.py
@@ -6,9 +6,10 @@
 # -----------------------------------------------------------------------------
 
 import argparse
+import logging
 
 from QEfficient.finetune.dataset.dataset_config import DATASET_PREPROC
-from QEfficient.finetune.utils.helper import BATCHING_STRATEGY, DEVICE, PEFT_METHOD, TASK_TYPE
+from QEfficient.finetune.utils.helper import Batching_Strategy, Device, Peft_Method, Task_Mode, enum_names
 
 
 def str2bool(v):
@@ -110,7 +111,14 @@ def get_finetune_parser():
         default=0,
         help="Maximum evaluation steps, unlimited if 0",
     )
-    parser.add_argument("--device", required=False, type=str, default="qaic", choices=DEVICE, help="Device to train on")
+    parser.add_argument(
+        "--device",
+        required=False,
+        type=str,
+        default=Device.QAIC.value,
+        choices=enum_names(Device),
+        help="Device to train on",
+    )
     parser.add_argument(
         "--num_workers_dataloader",
         "--num-workers-dataloader",
@@ -140,12 +148,12 @@ def get_finetune_parser():
         help="Dataset name to be used for finetuning (default: %(default)s)",
     )
     parser.add_argument(
-        "--task_type",
-        "--task-type",
+        "--task_mode",
+        "--task-mode",
         required=False,
         type=str,
-        default="generation",
-        choices=TASK_TYPE,
+        default=Task_Mode.GENERATION.value,
+        choices=enum_names(Task_Mode),
         help="Task used for finetuning. Use 'generation' for decoder based models and 'seq_classification' for encoder based models.",
     )
     parser.add_argument(
@@ -162,8 +170,8 @@ def get_finetune_parser():
         "--peft-method",
         required=False,
         type=str,
-        default="lora",
-        choices=PEFT_METHOD,
+        default=Peft_Method.LORA.value,
+        choices=enum_names(Peft_Method),
         help="Parameter efficient finetuning technique to be used. Currently only 'lora' is supported.",
     )
     parser.add_argument(
@@ -213,8 +221,8 @@ def get_finetune_parser():
         "--batching-strategy",
         required=False,
         type=str,
-        default="padding",
-        choices=BATCHING_STRATEGY,
+        default=Batching_Strategy.PADDING.value,
+        choices=enum_names(Batching_Strategy),
         help="Strategy for making batches of data points. Packing groups data points into batches by minimizing unnecessary empty spaces. Padding adds extra values (often zeros) to batch sequences so they align in size. Currently only padding is supported which is by default.",
     )
     parser.add_argument(
@@ -261,7 +269,22 @@ def get_finetune_parser():
         # This is for debugging purpose only.
         # Enables operation-by-operation verification w.r.t reference device(cpu).
         # It is a context manager interface that captures and verifies each operator against reference device.
-        # In case results of test & reference do not match under given tolerances, a standalone unittest is generated at dump_root_dir.
+        # In case results of test & reference do not match under given tolerances, a standalone unittest is generated at output_dir/mismatches.
+    )
+    parser.add_argument(
+        "--log_level",
+        "--log-level",
+        required=False,
+        type=str,
+        default=logging.INFO,
+        help="logging level",
+    )
+    parser.add_argument(
+        "--peft_config_file",
+        "--peft-config-file",
+        type=str,
+        default=None,
+        help="Path to YAML/JSON file containing PEFT (LoRA) config.",
     )
 
     return parser
diff --git a/QEfficient/finetune/utils/train_utils.py b/QEfficient/finetune/utils/train_utils.py
diff --git a/scripts/finetune/run_ft_model.py b/scripts/finetune/run_ft_model.py