PTuningScorer (#178)

nikiduki · github-actions[bot] · voorhs · web-flow · commit 54098c0c5458 · 2025-04-25T12:48:59.000+03:00
* Initial commit of PTuningScorer module * Added peft (>=0.10.0, <0.15.0) in dependencies * Implement fit/predict PTuningScorer * Added PTuningScorer in __init__ file * Update optimizer_config.schema.json * Minor fixs * PGH00 * Refactor clear_cache in fit method * Refactor typing ignore + remove unnecessary * Fix fit method status check * Added test for PTuningScorer * Fix mypy typing * Update and fix peft version dependencies * Fix mypy typing * Added test in multiclass.yaml, multilabel.yaml * Update docs strings * Fix mypy typing * Added trust_remote_code * make proper rst reference * Added test for dump lod * feat: added crossencoder (#181) * feat: added crossencoder * refactor * feat: added arg similarity * Update optimizer_config.schema.json * feat: added tests * feat: added errors * fix: scoring test * fix: description vectors error * fix: description vectors error * fix: lint * fix: test * add node validators (#177) * add node validators * add comments * Update optimizer_config.schema.json * rename bert model * lint * fixes * fix test --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: voorhs <ilya_alekseev_2016@list.ru> * fix: unit tests * feat: added test for description * feat: delete encoder_type from the class args * feat: update assets * feat: update assets * fix: fixed test * Update optimizer_config.schema.json --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Roman Solomatin <samoed.roman@gmail.com> Co-authored-by: voorhs <ilya_alekseev_2016@list.ru> * Added fixed seed to test reproduction * Pull LoraScorer and Bert Refactor * Refactor PTuningScorer * Refactor test for ptuning * Fix typing * Fix multilabel multiclass tests * Fix typing --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: voorhs <ilya_alekseev_2016@list.ru> Co-authored-by: Darinochka <39233990+Darinochka@users.noreply.github.com> Co-authored-by: Roman Solomatin <samoed.roman@gmail.com>
diff --git a/autointent/_callbacks/tensorboard.py b/autointent/_callbacks/tensorboard.py
@@ -16,7 +16,7 @@ def __init__(self) -> None:
         Raises an ImportError if neither are installed.
         """
         try:
-            from torch.utils.tensorboard import SummaryWriter  # type: ignore[attr-defined]
+            from torch.utils.tensorboard import SummaryWriter
 
             self.writer = SummaryWriter
         except ImportError:
diff --git a/autointent/modules/__init__.py b/autointent/modules/__init__.py
@@ -20,6 +20,7 @@
     KNNScorer,
     LinearScorer,
     MLKnnScorer,
+    PTuningScorer,
     RerankScorer,
     SklearnScorer,
 )
@@ -47,7 +48,8 @@ def _create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:
         SklearnScorer,
         MLKnnScorer,
         BertScorer,
-        BERTLoRAScorer
+        BERTLoRAScorer,
+        PTuningScorer,
     ]
 )
 
diff --git a/autointent/modules/scoring/__init__.py b/autointent/modules/scoring/__init__.py
@@ -5,6 +5,7 @@
 from ._linear import LinearScorer
 from ._lora import BERTLoRAScorer
 from ._mlknn import MLKnnScorer
+from ._ptuning import PTuningScorer
 from ._sklearn import SklearnScorer
 
 __all__ = [
@@ -15,6 +16,7 @@
     "KNNScorer",
     "LinearScorer",
     "MLKnnScorer",
+    "PTuningScorer",
     "RerankScorer",
     "SklearnScorer",
 ]
diff --git a/autointent/modules/scoring/_bert.py b/autointent/modules/scoring/_bert.py
@@ -85,7 +85,6 @@ def __initialize_model(self) -> None:
             problem_type="multi_label_classification" if self._multilabel else "single_label_classification",
         )
 
-
     def fit(
         self,
         utterances: list[str],
diff --git a/autointent/modules/scoring/_lora/lora.py b/autointent/modules/scoring/_lora/lora.py
@@ -81,8 +81,8 @@ def __init__(
             learning_rate=learning_rate,
             seed=seed,
             report_to=report_to,
-            )
-        self._lora_config = LoraConfig(**lora_kwargs) # type: ignore[arg-type]
+        )
+        self._lora_config = LoraConfig(**lora_kwargs)  # type: ignore[arg-type]
 
     @classmethod
     def from_context(
@@ -113,5 +113,5 @@ def __initialize_model(self) -> None:
             num_labels=self._n_classes,
             problem_type="multi_label_classification" if self._multilabel else "single_label_classification",
             trust_remote_code=self.classification_model_config.trust_remote_code,
-            )
+        )
         self._model = get_peft_model(self._model, self._lora_config)
diff --git a/autointent/modules/scoring/_ptuning/__init__.py b/autointent/modules/scoring/_ptuning/__init__.py
@@ -0,0 +1,3 @@
+from .ptuning import PTuningScorer
+
+__all__ = ["PTuningScorer"]
diff --git a/autointent/modules/scoring/_ptuning/ptuning.py b/autointent/modules/scoring/_ptuning/ptuning.py
@@ -0,0 +1,130 @@
+"""PTuningScorer class for ptuning-based classification."""
+
+from typing import Any
+
+import torch
+from peft import PromptEncoderConfig, get_peft_model
+from transformers import (
+    AutoModelForSequenceClassification,
+)
+
+from autointent import Context
+from autointent._callbacks import REPORTERS_NAMES
+from autointent.configs import HFModelConfig
+from autointent.modules.scoring._bert import BertScorer
+
+
+class PTuningScorer(BertScorer):
+    """PEFT P-tuning scorer.
+
+    Args:
+        classification_model_config: Config of the base transformer model (HFModelConfig, str, or dict)
+        num_train_epochs: Number of training epochs
+        batch_size: Batch size for training
+        learning_rate: Learning rate for training
+        seed: Random seed for reproducibility
+        report_to: Reporting tool for training logs
+        **ptuning_kwargs: Arguments for `PromptEncoderConfig <https://huggingface.co/docs/peft/package_reference/p_tuning#peft.PromptEncoderConfig>`_
+
+    Example:
+    --------
+    .. testcode::
+
+        from autointent.modules import PTuningScorer
+        scorer = PTuningScorer(
+            classification_model_config="prajjwal1/bert-tiny",
+            num_train_epochs=3,
+            batch_size=8,
+            task_type="SEQ_CLS",
+            num_virtual_tokens=10,
+            seed=42
+        )
+        utterances = ["hello", "goodbye", "allo", "sayonara"]
+        labels = [0, 1, 0, 1]
+        scorer.fit(utterances, labels)
+        test_utterances = ["hi", "bye"]
+        probabilities = scorer.predict(test_utterances)
+        print(probabilities)
+
+    .. testoutput::
+
+        [[0.49925193 0.50074804]
+        [0.4944601  0.5055399 ]]
+
+    """
+
+    name = "ptuning"
+    supports_multiclass = True
+    supports_multilabel = True
+    _model: Any
+    _tokenizer: Any
+
+    def __init__(
+        self,
+        classification_model_config: HFModelConfig | str | dict[str, Any] | None = None,
+        num_train_epochs: int = 3,
+        batch_size: int = 8,
+        learning_rate: float = 5e-5,
+        seed: int = 0,
+        report_to: REPORTERS_NAMES | None = None,  # type: ignore[valid-type]
+        **ptuning_kwargs: dict[str, Any],
+    ) -> None:
+        super().__init__(
+            classification_model_config=classification_model_config,
+            num_train_epochs=num_train_epochs,
+            batch_size=batch_size,
+            learning_rate=learning_rate,
+            seed=seed,
+            report_to=report_to,
+        )
+        self._ptuning_config = PromptEncoderConfig(**ptuning_kwargs)  # type: ignore[arg-type]
+        torch.manual_seed(seed)
+
+    @classmethod
+    def from_context(
+        cls,
+        context: Context,
+        classification_model_config: HFModelConfig | str | dict[str, Any] | None = None,
+        num_train_epochs: int = 3,
+        batch_size: int = 8,
+        learning_rate: float = 5e-5,
+        seed: int = 0,
+        **ptuning_kwargs: dict[str, Any],
+    ) -> "PTuningScorer":
+        """Create a PTuningScorer instance using a Context object.
+
+        Args:
+            context: Context containing configurations and utilities
+            classification_model_config: Config of the base model, or None to use the best embedder
+            num_train_epochs: Number of training epochs
+            batch_size: Batch size for training
+            learning_rate: Learning rate for training
+            seed: Random seed for reproducibility
+            **ptuning_kwargs: Arguments for PromptEncoderConfig
+        """
+        if classification_model_config is None:
+            classification_model_config = context.resolve_embedder()
+
+        report_to = context.logging_config.report_to
+
+        return cls(
+            classification_model_config=classification_model_config,
+            num_train_epochs=num_train_epochs,
+            batch_size=batch_size,
+            learning_rate=learning_rate,
+            seed=seed,
+            report_to=report_to,
+            **ptuning_kwargs,
+        )
+
+    def _initialize_model(self) -> None:
+        """Initialize the model with P-tuning configuration."""
+        model_name = self.classification_model_config.model_name
+        self._model = AutoModelForSequenceClassification.from_pretrained(
+            model_name,
+            num_labels=self._n_classes,
+            problem_type="multi_label_classification" if self._multilabel else "single_label_classification",
+            trust_remote_code=self.classification_model_config.trust_remote_code,
+            return_dict=True,
+        )
+        self._model = get_peft_model(self._model, self._ptuning_config)
diff --git a/pyproject.toml b/pyproject.toml
@@ -45,7 +45,7 @@ dependencies = [
     "xxhash (>=3.5.0,<4.0.0)",
     "python-dotenv (>=1.0.1,<2.0.0)",
     "transformers[torch] (>=4.49.0,<5.0.0)",
-    "peft (>= 0.10.0, <1.0.0)",
+    "peft (>= 0.10.0, !=0.15.0, !=0.15.1, <1.0.0)",
     "codecarbon (==2.6)",
 ]
 
diff --git a/tests/assets/configs/multiclass.yaml b/tests/assets/configs/multiclass.yaml
@@ -43,6 +43,12 @@
       learning_rate: [5.0e-5]
       seed: [0]
       lora_alpha: [16]
+    - module_name: ptuning
+      classification_model_config: ["prajjwal1/bert-tiny"]
+      num_train_epochs: [1]
+      batch_size: [8, 16]
+      task_type: ["SEQ_CLS"]
+      num_virtual_tokens: [10, 20]
 - node_type: decision
   target_metric: decision_accuracy
   search_space:
diff --git a/tests/assets/configs/multilabel.yaml b/tests/assets/configs/multilabel.yaml
@@ -31,6 +31,12 @@
       batch_size: [8]
       learning_rate: [5.0e-5]
       seed: [0]
+    - module_name: ptuning
+      classification_model_config: ["prajjwal1/bert-tiny"]
+      num_train_epochs: [1]
+      batch_size: [8]
+      task_type: ["SEQ_CLS"]
+      num_virtual_tokens: [10, 20]
     - module_name: lora
       classification_model_config:
         - model_name: avsolatorio/GIST-small-Embedding-v0
diff --git a/tests/modules/scoring/test_lora.py b/tests/modules/scoring/test_lora.py
@@ -15,10 +15,8 @@ def test_lora_scorer_dump_load(dataset):
 
     # Create and train scorer
     scorer_original = BERTLoRAScorer(
-        classification_model_config="prajjwal1/bert-tiny",
-        num_train_epochs=1,
-        batch_size=8
-        )
+        classification_model_config="prajjwal1/bert-tiny", num_train_epochs=1, batch_size=8
+    )
     scorer_original.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
 
     # Test data
@@ -38,10 +36,8 @@ def test_lora_scorer_dump_load(dataset):
 
         # Create a new scorer and load saved model
         scorer_loaded = BERTLoRAScorer(
-            classification_model_config="prajjwal1/bert-tiny",
-            num_train_epochs=1,
-            batch_size=8
-            )
+            classification_model_config="prajjwal1/bert-tiny", num_train_epochs=1, batch_size=8
+        )
         scorer_loaded.load(str(temp_dir_path))
 
         # Verify model and tokenizer are loaded
diff --git a/tests/modules/scoring/test_ptuning.py b/tests/modules/scoring/test_ptuning.py
@@ -0,0 +1,124 @@
+import shutil
+import tempfile
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from autointent.context.data_handler import DataHandler
+from autointent.modules import PTuningScorer
+
+
+def test_ptuning_scorer_dump_load(dataset):
+    """Test that PTuningScorer can be saved and loaded while preserving predictions."""
+    data_handler = DataHandler(dataset)
+
+    scorer_original = PTuningScorer(
+        classification_model_config="prajjwal1/bert-tiny",
+        num_train_epochs=1,
+        batch_size=8,
+        task_type="SEQ_CLS",
+        num_virtual_tokens=10,
+        seed=42,
+    )
+    scorer_original.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
+
+    test_data = [
+        "why is there a hold on my account",
+        "why is my bank account frozen",
+    ]
+
+    predictions_before = scorer_original.predict(test_data)
+
+    temp_dir_path = Path(tempfile.mkdtemp(prefix="ptuning_scorer_test_"))
+    try:
+        scorer_original.dump(str(temp_dir_path))
+
+        scorer_loaded = PTuningScorer(
+            classification_model_config="prajjwal1/bert-tiny",
+            num_train_epochs=1,
+            batch_size=8,
+            task_type="SEQ_CLS",
+            num_virtual_tokens=10,
+            seed=42,
+        )
+        scorer_loaded.load(str(temp_dir_path))
+
+        assert hasattr(scorer_loaded, "_model")
+        assert scorer_loaded._model is not None
+        assert hasattr(scorer_loaded, "_tokenizer")
+        assert scorer_loaded._tokenizer is not None
+
+        predictions_after = scorer_loaded.predict(test_data)
+
+        assert predictions_before.shape == predictions_after.shape
+        np.testing.assert_allclose(predictions_before, predictions_after, atol=1e-6)
+
+    finally:
+        shutil.rmtree(temp_dir_path, ignore_errors=True)  # workaround for windows permission error
+
+
+def test_ptuning_prediction(dataset):
+    """Test that the transformer model can fit and make predictions."""
+    data_handler = DataHandler(dataset)
+
+    scorer = PTuningScorer(
+        classification_model_config="prajjwal1/bert-tiny",
+        num_train_epochs=1,
+        batch_size=8,
+        task_type="SEQ_CLS",
+        num_virtual_tokens=10,
+        seed=42,
+    )
+
+    scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
+
+    test_data = [
+        "why is there a hold on my american saving bank account",
+        "i am nost sure why my account is blocked",
+        "why is there a hold on my capital one checking account",
+        "i think my account is blocked but i do not know the reason",
+        "can you tell me why is my bank account frozen",
+    ]
+
+    predictions = scorer.predict(test_data)
+
+    assert predictions.shape[0] == len(test_data)
+    assert predictions.shape[1] == len(set(data_handler.train_labels(0)))
+
+    assert 0.0 <= np.min(predictions) <= np.max(predictions) <= 1.0
+
+    if not scorer._multilabel:
+        for pred_row in predictions:
+            np.testing.assert_almost_equal(np.sum(pred_row), 1.0, decimal=5)
+
+    if hasattr(scorer, "predict_with_metadata"):
+        predictions, metadata = scorer.predict_with_metadata(test_data)
+        assert len(predictions) == len(test_data)
+        assert metadata is None
+
+
+def test_ptuning_cache_clearing(dataset):
+    """Test that the transformer model properly handles cache clearing."""
+    data_handler = DataHandler(dataset)
+
+    scorer = PTuningScorer(
+        classification_model_config="prajjwal1/bert-tiny",
+        num_train_epochs=1,
+        batch_size=8,
+        task_type="SEQ_CLS",
+        num_virtual_tokens=20,
+        seed=42,
+    )
+
+    scorer.fit(data_handler.train_utterances(0), data_handler.train_labels(0))
+
+    test_data = ["test text"]
+    scorer.predict(test_data)
+    scorer.clear_cache()
+
+    assert not hasattr(scorer, "_model") or scorer._model is None
+    assert not hasattr(scorer, "_tokenizer") or scorer._tokenizer is None
+
+    with pytest.raises(RuntimeError):
+        scorer.predict(test_data)

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@`
`20`	`20`	`KNNScorer,`
`21`	`21`	`LinearScorer,`
`22`	`22`	`MLKnnScorer,`
	`23`	`+ PTuningScorer,`
`23`	`24`	`RerankScorer,`
`24`	`25`	`SklearnScorer,`
`25`	`26`	`)`
`@@ -47,7 +48,8 @@ def _create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:`
`47`	`48`	`SklearnScorer,`
`48`	`49`	`MLKnnScorer,`
`49`	`50`	`BertScorer,`
`50`		`- BERTLoRAScorer`
	`51`	`+ BERTLoRAScorer,`
	`52`	`+ PTuningScorer,`
`51`	`53`	`]`
`52`	`54`	`)`
`53`	`55`
Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,6 @@ def __initialize_model(self) -> None:`
`85`	`85`	`problem_type="multi_label_classification" if self._multilabel else "single_label_classification",`
`86`	`86`	`)`
`87`	`87`
`88`		`-`
`89`	`88`	`def fit(`
`90`	`89`	`self,`
`91`	`90`	`utterances: list[str],`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .ptuning import PTuningScorer`
	`2`	`+`
	`3`	`+__all__ = ["PTuningScorer"]`
Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ dependencies = [`
`45`	`45`	`"xxhash (>=3.5.0,<4.0.0)",`
`46`	`46`	`"python-dotenv (>=1.0.1,<2.0.0)",`
`47`	`47`	`"transformers[torch] (>=4.49.0,<5.0.0)",`
`48`		`- "peft (>= 0.10.0, <1.0.0)",`
	`48`	`+ "peft (>= 0.10.0, !=0.15.0, !=0.15.1, <1.0.0)",`
`49`	`49`	`"codecarbon (==2.6)",`
`50`	`50`	`]`
`51`	`51`