Lora scorer (#170)

riapush · github-actions[bot] · Samoed · web-flow · commit 3f80b5289b13 · 2025-04-24T17:07:07.000+03:00
* added lora scorer * fix ruff * Update __init__.py * updated after mr #165 * Update pyproject.toml * fixed requested changes * fixed ruff failing * fixed remarks * Update optimizer_config.schema.json * added test * ruff fix * convert labels to float * Update autointent/modules/scoring/_lora/lora.py Co-authored-by: Roman Solomatin <samoed.roman@gmail.com> * Update autointent/modules/scoring/_lora/lora.py Co-authored-by: Roman Solomatin <samoed.roman@gmail.com> * change model_config name, added trust_remote_code * Update lora.py * inherited lora from bert * fix ruff * fix search space * Update lora.py * Update lora.py * added dump check * Update test_lora.py * Update test_lora.py * added docstring * fix ruff * Update test_lora.py * Update test_lora.py --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Roman Solomatin <samoed.roman@gmail.com>
diff --git a/.gitignore b/.gitignore
@@ -179,3 +179,4 @@ tests_logs
 tests/logs
 runs/
 vector_db*
+/wandb
diff --git a/autointent/modules/__init__.py b/autointent/modules/__init__.py
@@ -13,6 +13,7 @@
 from .embedding import LogregAimedEmbedding, RetrievalAimedEmbedding
 from .regex import SimpleRegex
 from .scoring import (
+    BERTLoRAScorer,
     BertScorer,
     DescriptionScorer,
     DNNCScorer,
@@ -46,6 +47,7 @@ def _create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:
         SklearnScorer,
         MLKnnScorer,
         BertScorer,
+        BERTLoRAScorer
     ]
 )
 
diff --git a/autointent/modules/scoring/__init__.py b/autointent/modules/scoring/__init__.py
@@ -3,10 +3,12 @@
 from ._dnnc import DNNCScorer
 from ._knn import KNNScorer, RerankScorer
 from ._linear import LinearScorer
+from ._lora import BERTLoRAScorer
 from ._mlknn import MLKnnScorer
 from ._sklearn import SklearnScorer
 
 __all__ = [
+    "BERTLoRAScorer",
     "BertScorer",
     "DNNCScorer",
     "DescriptionScorer",
diff --git a/autointent/modules/scoring/_bert.py b/autointent/modules/scoring/_bert.py
@@ -72,30 +72,33 @@ def from_context(
     def get_embedder_config(self) -> dict[str, Any]:
         return self.classification_model_config.model_dump()
 
-    def fit(
-        self,
-        utterances: list[str],
-        labels: ListOfLabels,
-    ) -> None:
-        if hasattr(self, "_model"):
-            self.clear_cache()
-        self._validate_task(labels)
-
-        model_name = self.classification_model_config.model_name
-        self._tokenizer = AutoTokenizer.from_pretrained(model_name)
-
+    def __initialize_model(self) -> None:
         label2id = {i: i for i in range(self._n_classes)}
         id2label = {i: i for i in range(self._n_classes)}
 
         self._model = AutoModelForSequenceClassification.from_pretrained(
-            model_name,
+            self.classification_model_config.model_name,
             trust_remote_code=self.classification_model_config.trust_remote_code,
             num_labels=self._n_classes,
             label2id=label2id,
             id2label=id2label,
             problem_type="multi_label_classification" if self._multilabel else "single_label_classification",
         )
 
+
+    def fit(
+        self,
+        utterances: list[str],
+        labels: ListOfLabels,
+    ) -> None:
+        if hasattr(self, "_model"):
+            self.clear_cache()
+        self._validate_task(labels)
+
+        self._tokenizer = AutoTokenizer.from_pretrained(self.classification_model_config.model_name)
+
+        self.__initialize_model()
+
         use_cpu = self.classification_model_config.device == "cpu"
 
         def tokenize_function(examples: dict[str, Any]) -> dict[str, Any]:
diff --git a/autointent/modules/scoring/_lora/__init__.py b/autointent/modules/scoring/_lora/__init__.py
@@ -0,0 +1,3 @@
+from .lora import BERTLoRAScorer
+
+__all__ = ["BERTLoRAScorer"]
diff --git a/autointent/modules/scoring/_lora/lora.py b/autointent/modules/scoring/_lora/lora.py
@@ -0,0 +1,117 @@
+"""BertScorer class for transformer-based classification with LoRA."""
+
+from typing import Any
+
+from peft import LoraConfig, get_peft_model
+from transformers import AutoModelForSequenceClassification
+
+from autointent import Context
+from autointent._callbacks import REPORTERS_NAMES
+from autointent.configs import HFModelConfig
+from autointent.modules.scoring._bert import BertScorer
+
+
+class BERTLoRAScorer(BertScorer):
+    """BERTLoRAScorer class for transformer-based classification with LoRA (Low-Rank Adaptation).
+
+    Args:
+        classification_model_config: Config of the base transformer model (HFModelConfig, str, or dict)
+        num_train_epochs: Number of training epochs (default: 3)
+        batch_size: Batch size for training (default: 8)
+        learning_rate: Learning rate for training (default: 5e-5)
+        seed: Random seed for reproducibility (default: 0)
+        report_to: Reporting tool for training logs
+        **lora_kwargs: Arguments for `LoraConfig <https://huggingface.co/docs/peft/package_reference/lora#peft.LoraConfig>`_
+
+    Example:
+    --------
+    .. testcode::
+
+        from autointent.modules import BERTLoRAScorer
+
+        # Initialize scorer with LoRA configuration
+        scorer = BERTLoRAScorer(
+            classification_model_config="bert-base-uncased",
+            num_train_epochs=3,
+            batch_size=8,
+            learning_rate=5e-5,
+            seed=42,
+            r=8,  # LoRA rank
+            lora_alpha=16,  # LoRA alpha
+        )
+
+        # Training data
+        utterances = ["This is great!", "I didn't like it", "Awesome product", "Poor quality"]
+        labels = [1, 0, 1, 0]  # Binary classification
+
+        # Fit the model
+        scorer.fit(utterances, labels)
+
+        # Make predictions
+        test_utterances = ["Good product", "Not worth it"]
+        probabilities = scorer.predict(test_utterances)
+        print(probabilities)
+
+    .. testoutput::
+
+        [[0.89 0.11]
+        [0.23 0.77]]
+    """
+
+    name = "lora"
+    supports_multiclass = True
+    supports_multilabel = True
+    _model: Any
+    _tokenizer: Any
+
+    def __init__(
+        self,
+        classification_model_config: HFModelConfig | str | dict[str, Any] | None = None,
+        num_train_epochs: int = 3,
+        batch_size: int = 8,
+        learning_rate: float = 5e-5,
+        seed: int = 0,
+        report_to: REPORTERS_NAMES | None = None,  # type: ignore[valid-type]
+        **lora_kwargs: dict[str, Any],
+    ) -> None:
+        super().__init__(
+            classification_model_config=classification_model_config,
+            num_train_epochs=num_train_epochs,
+            batch_size=batch_size,
+            learning_rate=learning_rate,
+            seed=seed,
+            report_to=report_to,
+            )
+        self._lora_config = LoraConfig(**lora_kwargs) # type: ignore[arg-type]
+
+    @classmethod
+    def from_context(
+        cls,
+        context: Context,
+        classification_model_config: HFModelConfig | str | dict[str, Any] | None = None,
+        num_train_epochs: int = 3,
+        batch_size: int = 8,
+        learning_rate: float = 5e-5,
+        seed: int = 0,
+        **lora_kwargs: dict[str, Any],
+    ) -> "BERTLoRAScorer":
+        if classification_model_config is None:
+            classification_model_config = context.resolve_embedder()
+        return cls(
+            classification_model_config=classification_model_config,
+            num_train_epochs=num_train_epochs,
+            batch_size=batch_size,
+            learning_rate=learning_rate,
+            seed=seed,
+            report_to=context.logging_config.report_to,
+            **lora_kwargs,
+        )
+
+    def __initialize_model(self) -> None:
+        self._model = AutoModelForSequenceClassification.from_pretrained(
+            self.classification_model_config.model_name,
+            num_labels=self._n_classes,
+            problem_type="multi_label_classification" if self._multilabel else "single_label_classification",
+            trust_remote_code=self.classification_model_config.trust_remote_code,
+            )
+        self._model = get_peft_model(self._model, self._lora_config)
diff --git a/pyproject.toml b/pyproject.toml
@@ -45,6 +45,7 @@ dependencies = [
     "xxhash (>=3.5.0,<4.0.0)",
     "python-dotenv (>=1.0.1,<2.0.0)",
     "transformers[torch] (>=4.49.0,<5.0.0)",
+    "peft (>= 0.10.0, <1.0.0)",
     "codecarbon (==2.6)",
 ]
 
diff --git a/tests/assets/configs/multiclass.yaml b/tests/assets/configs/multiclass.yaml
@@ -35,6 +35,14 @@
       batch_size: [8, 16]
       learning_rate: [5.0e-5]
       seed: [0]
+    - module_name: lora
+      classification_model_config:
+        - model_name: avsolatorio/GIST-small-Embedding-v0
+      num_train_epochs: [1]
+      batch_size: [8, 16]
+      learning_rate: [5.0e-5]
+      seed: [0]
+      lora_alpha: [16]
 - node_type: decision
   target_metric: decision_accuracy
   search_space:
diff --git a/tests/assets/configs/multilabel.yaml b/tests/assets/configs/multilabel.yaml
@@ -31,6 +31,14 @@
       batch_size: [8]
       learning_rate: [5.0e-5]
       seed: [0]
+    - module_name: lora
+      classification_model_config:
+        - model_name: avsolatorio/GIST-small-Embedding-v0
+      num_train_epochs: [1]
+      batch_size: [8]
+      learning_rate: [5.0e-5]
+      seed: [0]
+      lora_alpha: [16]
 - node_type: decision
   target_metric: decision_accuracy
   search_space:
diff --git a/tests/modules/scoring/test_lora.py b/tests/modules/scoring/test_lora.py
@@ -0,0 +1,124 @@
+import shutil
+import tempfile
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from autointent.context.data_handler import DataHandler
+from autointent.modules import BERTLoRAScorer
+
+
+def test_lora_scorer_dump_load(dataset):
+    """Test that BERTLoRAScorer can be saved and loaded while preserving predictions."""
+    data_handler = DataHandler(dataset)
+
+    # Create and train scorer
+    scorer_original = BERTLoRAScorer(
+        classification_model_config="prajjwal1/bert-tiny",
+        num_train_epochs=1,
+        batch_size=8
+        )
+    scorer_original.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
+
+    # Test data
+    test_data = [
+        "why is there a hold on my account",
+        "why is my bank account frozen",
+    ]
+
+    # Get predictions before saving
+    predictions_before = scorer_original.predict(test_data)
+
+    # Create temp directory and save model
+    temp_dir_path = Path(tempfile.mkdtemp(prefix="lora_scorer_test_"))
+    try:
+        # Save the model
+        scorer_original.dump(str(temp_dir_path))
+
+        # Create a new scorer and load saved model
+        scorer_loaded = BERTLoRAScorer(
+            classification_model_config="prajjwal1/bert-tiny",
+            num_train_epochs=1,
+            batch_size=8
+            )
+        scorer_loaded.load(str(temp_dir_path))
+
+        # Verify model and tokenizer are loaded
+        assert hasattr(scorer_loaded, "_model")
+        assert scorer_loaded._model is not None
+        assert hasattr(scorer_loaded, "_tokenizer")
+        assert scorer_loaded._tokenizer is not None
+
+        # Get predictions after loading
+        predictions_after = scorer_loaded.predict(test_data)
+
+        # Verify predictions match
+        assert predictions_before.shape == predictions_after.shape
+        np.testing.assert_allclose(predictions_before, predictions_after, atol=1e-6)
+
+    finally:
+        # Clean up
+        shutil.rmtree(temp_dir_path, ignore_errors=True)  # workaround for windows permission error
+
+
+def test_lora_prediction(dataset):
+    """Test that the lora model can fit and make predictions."""
+    data_handler = DataHandler(dataset)
+
+    scorer = BERTLoRAScorer(classification_model_config="prajjwal1/bert-tiny", num_train_epochs=1, batch_size=8)
+
+    scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
+
+    test_data = [
+        "why is there a hold on my american saving bank account",
+        "i am not sure why my account is blocked",
+        "why is there a hold on my capital one checking account",
+        "i think my account is blocked but i do not know the reason",
+        "can you tell me why is my bank account frozen",
+    ]
+
+    predictions = scorer.predict(test_data)
+
+    # Verify prediction shape
+    assert predictions.shape[0] == len(test_data)
+    assert predictions.shape[1] == len(set(data_handler.train_labels(0)))
+
+    # Verify predictions are probabilities
+    assert 0.0 <= np.min(predictions) <= np.max(predictions) <= 1.0
+
+    # Verify probabilities sum to 1 for multiclass
+    if not scorer._multilabel:
+        for pred_row in predictions:
+            np.testing.assert_almost_equal(np.sum(pred_row), 1.0, decimal=5)
+
+    # Test metadata function if available
+    if hasattr(scorer, "predict_with_metadata"):
+        predictions, metadata = scorer.predict_with_metadata(test_data)
+        assert len(predictions) == len(test_data)
+        assert metadata is None
+
+
+def test_lora_cache_clearing(dataset):
+    """Test that the lora model properly handles cache clearing."""
+    data_handler = DataHandler(dataset)
+
+    scorer = BERTLoRAScorer(classification_model_config="prajjwal1/bert-tiny", num_train_epochs=1, batch_size=8)
+
+    scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
+
+    test_data = ["test text"]
+
+    # Should work before clearing cache
+    scorer.predict(test_data)
+
+    # Clear the cache
+    scorer.clear_cache()
+
+    # Verify model and tokenizer are removed
+    assert not hasattr(scorer, "_model") or scorer._model is None
+    assert not hasattr(scorer, "_tokenizer") or scorer._tokenizer is None
+
+    # Should raise exception after clearing cache
+    with pytest.raises(RuntimeError):
+        scorer.predict(test_data)

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@`
`13`	`13`	`from .embedding import LogregAimedEmbedding, RetrievalAimedEmbedding`
`14`	`14`	`from .regex import SimpleRegex`
`15`	`15`	`from .scoring import (`
	`16`	`+ BERTLoRAScorer,`
`16`	`17`	`BertScorer,`
`17`	`18`	`DescriptionScorer,`
`18`	`19`	`DNNCScorer,`
`@@ -46,6 +47,7 @@ def _create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:`
`46`	`47`	`SklearnScorer,`
`47`	`48`	`MLKnnScorer,`
`48`	`49`	`BertScorer,`
	`50`	`+ BERTLoRAScorer`
`49`	`51`	`]`
`50`	`52`	`)`
`51`	`53`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .lora import BERTLoRAScorer`
	`2`	`+`
	`3`	`+__all__ = ["BERTLoRAScorer"]`
Original file line number	Diff line number	Diff line change
`@@ -45,6 +45,7 @@ dependencies = [`
`45`	`45`	`"xxhash (>=3.5.0,<4.0.0)",`
`46`	`46`	`"python-dotenv (>=1.0.1,<2.0.0)",`
`47`	`47`	`"transformers[torch] (>=4.49.0,<5.0.0)",`
	`48`	`+ "peft (>= 0.10.0, <1.0.0)",`
`48`	`49`	`"codecarbon (==2.6)",`
`49`	`50`	`]`
`50`	`51`