added test

riapush · riapush · commit 7dfb8f8007f0 · 2025-04-01T20:05:24.000+03:00
diff --git a/autointent/modules/__init__.py b/autointent/modules/__init__.py
@@ -13,6 +13,7 @@
 from .embedding import LogregAimedEmbedding, RetrievalAimedEmbedding
 from .regex import SimpleRegex
 from .scoring import (
+    BERTLoRAScorer,
     BertScorer,
     DescriptionScorer,
     DNNCScorer,
@@ -46,6 +47,7 @@ def _create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:
         SklearnScorer,
         MLKnnScorer,
         BertScorer,
+        BERTLoRAScorer
     ]
 )
 
diff --git a/autointent/modules/scoring/_lora/lora.py b/autointent/modules/scoring/_lora/lora.py
@@ -93,7 +93,7 @@ def fit(
             )
         self._model = get_peft_model(self._model, self._lora_config)
 
-        device = torch.device(self.model_config.device)
+        device = torch.device(self.model_config.device if self.model_config.device else 'cpu')
         self._model = self._model.to(device)
 
         use_cpu = self.model_config.device == "cpu"
@@ -137,7 +137,7 @@ def predict(self, utterances: list[str]) -> npt.NDArray[Any]:
             msg = "Model is not trained. Call fit() first."
             raise RuntimeError(msg)
         
-        device = torch.device(self.model_config.device)
+        device = torch.device(self.model_config.device if self.model_config.device else 'cpu')
         self._model = self._model.to(device)
 
         all_predictions = []
diff --git a/tests/assets/configs/multiclass.yaml b/tests/assets/configs/multiclass.yaml
@@ -35,6 +35,13 @@
       batch_size: [8, 16]
       learning_rate: [5.0e-5]
       seed: [0]
+    - module_name: lora
+      model_config:
+        - model_name: avsolatorio/GIST-small-Embedding-v0
+      num_train_epochs: [1]
+      batch_size: [8, 16]
+      learning_rate: [5.0e-5]
+      seed: [0]
 - node_type: decision
   target_metric: decision_accuracy
   search_space:
diff --git a/tests/assets/configs/multilabel.yaml b/tests/assets/configs/multilabel.yaml
@@ -31,6 +31,13 @@
       batch_size: [8]
       learning_rate: [5.0e-5]
       seed: [0]
+    - module_name: lora
+      model_config:
+        - model_name: avsolatorio/GIST-small-Embedding-v0
+      num_train_epochs: [1]
+      batch_size: [8]
+      learning_rate: [5.0e-5]
+      seed: [0]
 - node_type: decision
   target_metric: decision_accuracy
   search_space:
diff --git a/tests/modules/scoring/test_lora.py b/tests/modules/scoring/test_lora.py
@@ -0,0 +1,56 @@
+import numpy as np
+import pytest
+
+from autointent.context.data_handler import DataHandler
+from autointent.modules import BERTLoRAScorer
+
+
+def test_lora_prediction(dataset):
+    """Test that the transformer model can fit and make predictions."""
+    data_handler = DataHandler(dataset)
+
+    scorer = BERTLoRAScorer(model_config="prajjwal1/bert-tiny", num_train_epochs=1, batch_size=8)
+    scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
+
+    test_data = [
+        "why is there a hold on my american saving bank account",
+        "i am not sure why my account is blocked",
+        "why is there a hold on my capital one checking account",
+        "i think my account is blocked but i do not know the reason",
+        "can you tell me why is my bank account frozen",
+    ]
+
+    predictions = scorer.predict(test_data)
+
+    assert predictions.shape[0] == len(test_data)
+    assert predictions.shape[1] == len(set(data_handler.train_labels(0)))
+
+    assert 0.0 <= np.min(predictions) <= np.max(predictions) <= 1.0
+
+    if not scorer._multilabel:
+        for pred_row in predictions:
+            np.testing.assert_almost_equal(np.sum(pred_row), 1.0, decimal=5)
+
+    if hasattr(scorer, "predict_with_metadata"):
+        predictions, metadata = scorer.predict_with_metadata(test_data)
+        assert len(predictions) == len(test_data)
+        assert metadata is None
+
+
+def test_bert_cache_clearing(dataset):
+    """Test that the transformer model properly handles cache clearing."""
+    data_handler = DataHandler(dataset)
+
+    scorer = BERTLoRAScorer(model_config="prajjwal1/bert-tiny", num_train_epochs=1, batch_size=8)
+    scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
+
+    test_data = ["test text"]
+
+    scorer.predict(test_data)
+    scorer.clear_cache()
+
+    assert not hasattr(scorer, "_model") or scorer._model is None
+    assert not hasattr(scorer, "_tokenizer") or scorer._tokenizer is None
+
+    with pytest.raises(RuntimeError):
+        scorer.predict(test_data)

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@`
`13`	`13`	`from .embedding import LogregAimedEmbedding, RetrievalAimedEmbedding`
`14`	`14`	`from .regex import SimpleRegex`
`15`	`15`	`from .scoring import (`
	`16`	`+ BERTLoRAScorer,`
`16`	`17`	`BertScorer,`
`17`	`18`	`DescriptionScorer,`
`18`	`19`	`DNNCScorer,`
`@@ -46,6 +47,7 @@ def _create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:`
`46`	`47`	`SklearnScorer,`
`47`	`48`	`MLKnnScorer,`
`48`	`49`	`BertScorer,`
	`50`	`+ BERTLoRAScorer`
`49`	`51`	`]`
`50`	`52`	`)`
`51`	`53`