model layer stuff

J-SUPHA · J-SUPHA · commit 301d0b269920 · 2026-02-18T10:52:20.000-05:00
diff --git a/example_trainer/README.md b/example_trainer/README.md
@@ -570,6 +570,48 @@ python -m example_trainer.vllm_api_server  # NOT direct vllm commands
 | `--lora-alpha` | 32 | LoRA alpha scaling factor |
 | `--lora-dropout` | 0.05 | LoRA dropout probability |
 | `--lora-target-modules` | None | Module names to apply LoRA (`None` falls back to `q_proj v_proj`) |
+| `--lora-layer-indices` | None | Optional layer filter (examples: `20-31`, `0-3,28-31`) |
+
+### LoRA Layer Index Guide (by Architecture)
+
+`--lora-layer-indices` is model-dependent. Different models expose different numbers of transformer blocks, so a valid range for one model may be invalid for another.
+
+| Architecture family | Common config fields | Typical layer list path | Notes |
+|---------------------|----------------------|-------------------------|-------|
+| LLaMA / Llama-2 / Llama-3 / Mistral | `num_hidden_layers` | `model.layers` | Most common causal-LM layout |
+| Qwen / Qwen2 / Qwen2.5 / Qwen3 | `num_hidden_layers` | `model.layers` | Similar layer indexing to LLaMA |
+| GPT-2 / GPT-J style | `n_layer` or mapped to `num_hidden_layers` | `transformer.h` | PEFT may use `h` pattern internally |
+| Falcon | `num_hidden_layers` | `transformer.h` | Uses `h` block list in model module tree |
+
+#### Reliable way to check for any model
+
+Always query the model config before choosing indices:
+
+```bash
+python - <<'PY'
+from transformers import AutoConfig
+
+model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+cfg = AutoConfig.from_pretrained(model_id)
+num_layers = getattr(cfg, "num_hidden_layers", None)
+if num_layers is None:
+    num_layers = getattr(cfg, "n_layer", None)
+
+print(f"model={model_id}")
+print(f"num_hidden_layers={num_layers}")
+if num_layers is not None:
+    print(f"valid index range: 0-{num_layers-1}")
+PY
+```
+
+#### Practical presets
+
+If your model has `N` layers:
+
+- Full layers: omit `--lora-layer-indices`
+- Top 25%: `--lora-layer-indices {int(0.75*N)}-{N-1}`
+- Top 50%: `--lora-layer-indices {int(0.5*N)}-{N-1}`
+- Last 12 layers: `--lora-layer-indices {N-12}-{N-1}` (if `N >= 12`)
 
 ### vLLM Arguments
 
diff --git a/example_trainer/cli.py b/example_trainer/cli.py
@@ -6,6 +6,7 @@
 """
 
 import argparse
+from typing import List, Optional
 
 import torch
 
@@ -16,6 +17,53 @@
 # =============================================================================
 
 
+def _parse_lora_layer_indices(value: str) -> Optional[List[int]]:
+    """
+    Parse LoRA layer indices from comma/range syntax.
+
+    Supported formats:
+    - "20-31"
+    - "0,1,2,28,29,30,31"
+    - "0-3,28-31"
+    """
+    if value is None:
+        return None
+
+    raw = value.strip()
+    if not raw:
+        return None
+
+    indices: List[int] = []
+    parts = [part.strip() for part in raw.split(",") if part.strip()]
+
+    try:
+        for part in parts:
+            if "-" in part:
+                start_s, end_s = part.split("-", 1)
+                start = int(start_s.strip())
+                end = int(end_s.strip())
+                if start > end:
+                    raise argparse.ArgumentTypeError(
+                        f"Invalid range '{part}': start must be <= end"
+                    )
+                indices.extend(range(start, end + 1))
+            else:
+                indices.append(int(part))
+    except ValueError as e:
+        raise argparse.ArgumentTypeError(
+            f"Invalid --lora-layer-indices value '{value}': {e}"
+        ) from e
+
+    if not indices:
+        return None
+    if any(idx < 0 for idx in indices):
+        raise argparse.ArgumentTypeError(
+            f"Invalid --lora-layer-indices '{value}': indices must be >= 0"
+        )
+
+    return sorted(set(indices))
+
+
 def add_model_args(parser: argparse.ArgumentParser) -> None:
     """Add model-related arguments."""
     group = parser.add_argument_group("Model")
@@ -225,6 +273,15 @@ def add_lora_args(parser: argparse.ArgumentParser) -> None:
         default=None,
         help="Module names to apply LoRA to (default: q_proj v_proj)",
     )
+    group.add_argument(
+        "--lora-layer-indices",
+        type=_parse_lora_layer_indices,
+        default=None,
+        help=(
+            "Optional layer indices to apply LoRA to, e.g. '20-31' or "
+            "'0-3,28-31'. If omitted, applies to all matching layers."
+        ),
+    )
 
 
 def add_distributed_args(parser: argparse.ArgumentParser) -> None:
@@ -373,6 +430,7 @@ def config_from_args(args: argparse.Namespace) -> TrainingConfig:
         lora_alpha=getattr(args, "lora_alpha", 32),
         lora_dropout=getattr(args, "lora_dropout", 0.05),
         lora_target_modules=getattr(args, "lora_target_modules", None),
+        lora_layer_indices=getattr(args, "lora_layer_indices", None),
         vllm_config_path=getattr(args, "vllm_config_path", None),
         debug_loading=getattr(args, "debug_loading", False),
         benchmark=getattr(args, "benchmark", False),
diff --git a/example_trainer/config.py b/example_trainer/config.py
@@ -154,6 +154,13 @@ class TrainingConfig(BaseModel):
             "If None, defaults to ['q_proj', 'v_proj'] for most models."
         ),
     )
+    lora_layer_indices: Optional[List[int]] = Field(
+        None,
+        description=(
+            "Optional list of transformer layer indices to apply LoRA to. "
+            "If None, applies LoRA to all matching layers."
+        ),
+    )
 
     # === Single-Copy Mode Configuration ===
     single_copy: bool = Field(
diff --git a/example_trainer/model.py b/example_trainer/model.py
@@ -200,18 +200,41 @@ def _load_model_with_lora(config: TrainingConfig) -> torch.nn.Module:
     target_modules = config.lora_target_modules
     if target_modules is None:
         target_modules = ["q_proj", "v_proj"]
+    layer_indices = config.lora_layer_indices
+
+    if layer_indices is not None:
+        num_hidden_layers = getattr(base_model.config, "num_hidden_layers", None)
+        if num_hidden_layers is None:
+            raise RuntimeError(
+                "Model config does not expose num_hidden_layers; cannot validate "
+                "--lora-layer-indices for this architecture."
+            )
+        invalid = [idx for idx in layer_indices if idx >= num_hidden_layers]
+        if invalid:
+            raise ValueError(
+                f"Invalid --lora-layer-indices {invalid} for model with "
+                f"{num_hidden_layers} layers (valid range: 0-{num_hidden_layers - 1})"
+            )
 
     print(f"Applying LoRA: r={config.lora_r}, alpha={config.lora_alpha}")
     print(f"Target modules: {target_modules}")
+    if layer_indices is not None:
+        print(
+            f"Applying LoRA only to layers: {layer_indices} "
+            f"(total {len(layer_indices)})"
+        )
 
-    lora_config = LoraConfig(
+    lora_kwargs = dict(
         task_type=TaskType.CAUSAL_LM,
         r=config.lora_r,
         lora_alpha=config.lora_alpha,
         lora_dropout=config.lora_dropout,
         target_modules=target_modules,
         bias="none",
     )
+    if layer_indices is not None:
+        lora_kwargs["layers_to_transform"] = layer_indices
+    lora_config = LoraConfig(**lora_kwargs)
 
     model = get_peft_model(base_model, lora_config)
     model.print_trainable_parameters()