add pipeline tests for each scoring module

voorhs · voorhs · commit f60eae7ecc4f · 2025-11-19T17:29:23.000+03:00
diff --git a/tests/modules/scoring/test_bert.py b/tests/modules/scoring/test_bert.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 
+from autointent import Pipeline
 from autointent.context.data_handler import DataHandler
 from autointent.modules import BertScorer
 
@@ -115,3 +116,26 @@ def test_bert_cache_clearing(dataset):
     # Should raise exception after clearing cache
     with pytest.raises(RuntimeError):
         scorer.predict(test_data)
+
+
+def test_bert_in_pipeline(dataset):
+    """Test BertScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "bert",
+                    "classification_model_config": [{"model_name": "prajjwal1/bert-tiny"}],
+                    "num_train_epochs": [1],
+                    "batch_size": [8],
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "argmax"}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset)
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_catboost.py b/tests/modules/scoring/test_catboost.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 
+from autointent import Pipeline
 from autointent.context.data_handler import DataHandler
 from autointent.modules import CatBoostScorer
 from tests.conftest import get_test_embedder_config
@@ -146,3 +147,26 @@ def test_catboost_cache_clearing(dataset):
     scorer.clear_cache()
     with pytest.raises(RuntimeError):
         scorer.predict(test_data)
+
+
+def test_catboost_in_pipeline(dataset):
+    """Test CatBoostScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "catboost",
+                    "iterations": [50],
+                    "learning_rate": [0.05],
+                    "features_type": ["embedding"],
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "argmax"}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset)
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_cnn.py b/tests/modules/scoring/test_cnn.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 
+from autointent import Pipeline
 from autointent.configs import VocabConfig
 from autointent.context.data_handler import DataHandler
 from autointent.modules.scoring import CNNScorer
@@ -120,3 +121,25 @@ def test_cnn_scorer_dump_load(dataset):
     finally:
         # Clean up
         shutil.rmtree(temp_dir_path, ignore_errors=True)  # workaround for windows permission error
+
+
+def test_cnn_in_pipeline(dataset):
+    """Test CNNScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "cnn",
+                    "embed_dim": [8],
+                    "num_train_epochs": [1],
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "argmax"}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset)
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_description_bi.py b/tests/modules/scoring/test_description_bi.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pytest
 
+from autointent import Pipeline
 from autointent.context.data_handler import DataHandler
 from autointent.modules import BiEncoderDescriptionScorer
 
@@ -56,3 +57,25 @@ def test_description_scorer(dataset, expected_prediction, multilabel):
         new_scorer = BiEncoderDescriptionScorer.load(temp_dir)
         new_predictions = new_scorer.predict(test_utterances)
         np.testing.assert_almost_equal(predictions, new_predictions, decimal=5)
+
+
+def test_description_bi_in_pipeline(dataset):
+    """Test BiEncoderDescriptionScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "description_bi",
+                    "embedder_config": [{"model_name": "sergeyzh/rubert-tiny-turbo"}],
+                    "temperature": [0.3],
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "argmax"}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset)
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_description_cross.py b/tests/modules/scoring/test_description_cross.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pytest
 
+from autointent import Pipeline
 from autointent.context.data_handler import DataHandler
 from autointent.modules import CrossEncoderDescriptionScorer
 
@@ -64,3 +65,25 @@ def test_description_scorer_cross_encoder(dataset, expected_prediction, multilab
         np.testing.assert_almost_equal(predictions, loaded_predictions, decimal=5)
 
         new_scorer.clear_cache()
+
+
+def test_description_cross_in_pipeline(dataset):
+    """Test CrossEncoderDescriptionScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "description_cross",
+                    "cross_encoder_config": [{"model_name": "cross-encoder/ms-marco-MiniLM-L6-v2"}],
+                    "temperature": [0.3],
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "argmax"}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset)
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_description_llm.py b/tests/modules/scoring/test_description_llm.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 
+from autointent import Pipeline
 from autointent.context.data_handler import DataHandler
 from autointent.modules import LLMDescriptionScorer
 
@@ -50,3 +51,28 @@ def test_description_scorer_llm(dataset, multilabel):
         new_scorer = LLMDescriptionScorer.load(temp_dir)
         new_predictions = new_scorer.predict(test_utterances)
         np.testing.assert_almost_equal(predictions, new_predictions, decimal=5)
+
+
+@pytest.mark.skipif(
+    not os.getenv("OPENAI_API_KEY") or not os.getenv("OPENAI_MODEL_NAME"),
+    reason="OPENAI_API_KEY and OPENAI_MODEL_NAME environment variables are required for this test",
+)
+def test_llm_description_in_pipeline(dataset):
+    """Test LLMDescriptionScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "description_llm",
+                    "temperature": [0.3],
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "argmax"}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset)
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_dnnc.py b/tests/modules/scoring/test_dnnc.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pytest
 
+from autointent import Pipeline
 from autointent.context.data_handler import DataHandler
 from autointent.modules import DNNCScorer
 
@@ -41,3 +42,25 @@ def test_base_dnnc(dataset, train_head, pred_score):
         new_scorer = DNNCScorer.load(temp_dir)
         new_predictions = new_scorer.predict(test_data)
         np.testing.assert_almost_equal(predictions, new_predictions, decimal=5)
+
+
+def test_dnnc_in_pipeline(dataset):
+    """Test DNNCScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "dnnc",
+                    "cross_encoder_config": [{"model_name": "cross-encoder/ms-marco-MiniLM-L6-v2"}],
+                    "k": [3],
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "argmax"}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset)
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_gcn_scorer.py b/tests/modules/scoring/test_gcn_scorer.py
@@ -2,7 +2,7 @@
 import pytest
 import torch
 
-from autointent import Dataset
+from autointent import Dataset, Pipeline
 from autointent.modules.scoring import GCNScorer
 from tests.conftest import get_test_embedder_config
 
@@ -91,3 +91,25 @@ def test_gcn_scorer_dump_load(tmp_path, multilabel_dataset):
     loaded_predictions = loaded_scorer.predict(test_utterances)
 
     np.testing.assert_allclose(original_predictions, loaded_predictions, atol=1e-6)
+
+
+def test_gcn_in_pipeline(dataset):
+    """Test GCNScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "gcn",
+                    "num_train_epochs": [1],
+                    "batch_size": [8],
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "threshold", "thresh": [0.5]}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset.to_multilabel())
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_knn.py b/tests/modules/scoring/test_knn.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 
+from autointent import Pipeline
 from autointent.context.data_handler import DataHandler
 from autointent.modules import KNNScorer
 from tests.conftest import get_test_embedder_config
@@ -45,3 +46,25 @@ def test_base_knn(dataset):
         new_scorer = KNNScorer.load(temp_dir)
         new_predictions = new_scorer.predict(test_data)
         assert np.allclose(predictions, new_predictions)
+
+
+def test_knn_in_pipeline(dataset):
+    """Test KNNScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "knn",
+                    "k": [3],
+                    "weights": ["distance"],
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "argmax"}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset)
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_linear.py b/tests/modules/scoring/test_linear.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 
+from autointent import Pipeline
 from autointent.context.data_handler import DataHandler
 from autointent.modules import LinearScorer
 from tests.conftest import get_test_embedder_config
@@ -45,3 +46,23 @@ def test_base_linear(dataset):
         new_scorer = LinearScorer.load(temp_dir)
         new_predictions = new_scorer.predict(test_data)
         np.testing.assert_almost_equal(predictions, new_predictions, decimal=5)
+
+
+def test_linear_in_pipeline(dataset):
+    """Test LinearScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "linear",
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "argmax"}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset)
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_lora.py b/tests/modules/scoring/test_lora.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 
+from autointent import Pipeline
 from autointent.context.data_handler import DataHandler
 from autointent.modules import BERTLoRAScorer
 
@@ -117,3 +118,26 @@ def test_lora_cache_clearing(dataset):
     # Should raise exception after clearing cache
     with pytest.raises(RuntimeError):
         scorer.predict(test_data)
+
+
+def test_lora_in_pipeline(dataset):
+    """Test BERTLoRAScorer as part of an AutoML pipeline."""
+    search_space = [
+        {
+            "node_type": "scoring",
+            "search_space": [
+                {
+                    "module_name": "lora",
+                    "classification_model_config": [{"model_name": "prajjwal1/bert-tiny"}],
+                    "num_train_epochs": [1],
+                    "batch_size": [8],
+                }
+            ],
+        },
+        {"node_type": "decision", "search_space": [{"module_name": "argmax"}]},
+    ]
+
+    pipeline = Pipeline.from_search_space(search_space)
+    pipeline.fit(dataset)
+    predictions = pipeline.predict(["test utterance"])
+    assert len(predictions) == 1
diff --git a/tests/modules/scoring/test_mlknn.py b/tests/modules/scoring/test_mlknn.py
diff --git a/tests/modules/scoring/test_ptuning.py b/tests/modules/scoring/test_ptuning.py
diff --git a/tests/modules/scoring/test_rerank_scorer.py b/tests/modules/scoring/test_rerank_scorer.py
diff --git a/tests/modules/scoring/test_rnn.py b/tests/modules/scoring/test_rnn.py
diff --git a/tests/modules/scoring/test_sklearn.py b/tests/modules/scoring/test_sklearn.py