Test classes for validation suggester and tuebingen model suggester.

grace-sng7 · grace-sng7 · commit f6b7ef334bca · 2025-03-29T20:54:05.000-05:00
Signed-off-by: Grace Sng &lt;grace.sng75@gmail.com&gt;
diff --git a/pywhyllm/suggesters/tuebingen_model_suggester.py b/pywhyllm/suggesters/tuebingen_model_suggester.py
@@ -17,7 +17,7 @@ class Strategy(Enum):
 
 
 class TuebingenModelSuggester(ModelSuggester):
-    def __init__(self, llm):
+    def __init__(self, llm=None):
         super().__init__(llm)
 
     def suggest_description(
diff --git a/pywhyllm/suggesters/validation_suggester.py b/pywhyllm/suggesters/validation_suggester.py
diff --git a/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py b/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py
@@ -0,0 +1,25 @@
+# TESTS
+variable = "water"
+variable_a = "water intake"
+description_a = "the amount of water a person drinks per day"
+variable_b = "hydration level"
+description_b = "the level of hydration in the body"
+domain = "biology"
+
+# MOCK_RESPONSES
+test_suggest_description_expected_response = "<description>Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states.</description>"
+test_suggest_onesided_relationship_expected_response = "<answer>A</answer>"
+test_suggest_relationship_expected_response = "<answer>Yes</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"
+# ASSERTIONS
+test_suggest_description_expected_result = [
+    "Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states."]
+test_suggest_onesided_relationship_expected_result = 1
+test__build_description_program_expected_result = {
+    'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n            is to provide factual and succinct description of the given concept.',
+    'user': " Describe the concept of water.\n                    In one sentence, provide a factual and succinct description of water\n                        Let's think step-by-step to make sure that we have a proper and clear description. Then provide \n                        your final answer within the tags, <description></description>."}
+test_suggest_relationship_expected_result = (1,
+                                             [
+                                                 'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
+test__build_relationship_program_expected_result = {
+    'system': 'You are a helpful assistant on causal reasoning and biology. Your goal is to answer \n            questions about cause and effect in a factual and concise way.',
+    'user': "can changing water intake change hydration level? Answer Yes or No.At each step, each expert include a reference to a research paper that supports \n                    their argument. They will provide a one sentence summary of the paper and how it supports their argument. \n                        Then they will answer whether a change in water intake changes hydration level. Answer Yes or No.\n                        When consensus is reached, thinking carefully and factually, explain the council's answer. Provide \n                        the answer within the tags, <answer>Yes/No</answer>, and the most influential reference within \n                        the tags <reference>Author, Title, Year of publication</reference>.\n                        \n\n\n----------------\n\n\n<answer>Yes</answer>\n<reference>Author, Title, Year of \n                        publication</reference>\n\n\n----------------\n\n\n<answer>No</answer> {~/user}"}
diff --git a/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py b/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py
@@ -0,0 +1,56 @@
+# TESTS
+test_vars = ["smoking", "lung cancer", "exercise habits", "air pollution exposure"]
+domain_expertises = ['Epidemiology']
+
+# MOCK RESPONSES
+test_latent_confounders_expected_response = "<confounding_factor>socio-economic status</confounding_factor> <confounding_factor>mental health</confounding_factor>"
+test_negative_controls_expected_response = "<negative_control>exercise habits</negative_control>"
+test_parent_critique_expected_response = "None"
+test_children_critique_expected_response = "<influenced_factor>lung cancer</influenced_factor>"
+test_pairwise_critique_expected_response = "The answer is <answer>A</answer>"
+test_critique_graph_parent_expected_response = ["None",
+                                                "<influencing_factor>smoking</influencing_factor> <influencing_factor>air pollution exposure</influencing_factor>",
+                                                "<influencing_factor>air pollution exposure</influencing_factor>",
+                                                "None"]
+test_critique_graph_children_expected_response = ["<influenced_factor>lung cancer</influenced_factor>",
+                                                  "<influenced_factor>exercise habits</influenced_factor>",
+                                                  "<influenced_factor>lung cancer</influenced_factor>",
+                                                  "<influenced_factor>lung cancer</influenced_factor> <influenced_factor>exercise habits</influenced_factor>"]
+test_critique_graph_pairwise_expected_response = ["<answer>A</answer>", "<answer>A</answer>", "<answer>C</answer>",
+                                                  "<answer>B</answer>", "<answer>B</answer>", "<answer>B</answer>"]
+
+# ASSERTIONS
+test_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
+                                            ['socio-economic status', 'mental health'])
+test_negative_controls_expected_results = ({'exercise habits': 1}, ['exercise habits'])
+test_parent_critique_expected_results = []
+test_children_critique_expected_results = ['lung cancer']
+test_pairwise_critique_expected_results = ('smoking', 'lung cancer')
+test_critique_graph_parent_expected_results = ({('air pollution exposure', 'exercise habits'): 1,
+                                                ('air pollution exposure', 'lung cancer'): 1,
+                                                ('air pollution exposure', 'smoking'): 1,
+                                                ('smoking', 'lung cancer'): 1},
+                                               {('air pollution exposure', 'exercise habits'): 1,
+                                                ('air pollution exposure', 'lung cancer'): 1,
+                                                ('smoking', 'lung cancer'): 1})
+test_critique_graph_children_expected_results = ({('air pollution exposure', 'smoking'): 1,
+                                                  ('exercise habits', 'air pollution exposure'): 1,
+                                                  ('exercise habits', 'smoking'): 1,
+                                                  ('lung cancer', 'air pollution exposure'): 1,
+                                                  ('lung cancer', 'exercise habits'): 1,
+                                                  ('lung cancer', 'smoking'): 1},
+                                                 {('exercise habits', 'air pollution exposure'): 1,
+                                                  ('exercise habits', 'lung cancer'): 1,
+                                                  ('lung cancer', 'air pollution exposure'): 1,
+                                                  ('lung cancer', 'exercise habits'): 1,
+                                                  ('lung cancer', 'smoking'): 1})
+test_critique_graph_pairwise_expected_results = ({('air pollution exposure', 'exercise habits'): 1,
+                                                  ('exercise habits', 'lung cancer'): 1,
+                                                  ('smoking', 'air pollution exposure'): 1,
+                                                  ('smoking', 'exercise habits'): 1,
+                                                  ('smoking', 'lung cancer'): 1},
+                                                 {('smoking', 'lung cancer'): 1,
+                                                  ('smoking', 'exercise habits'): 1,
+                                                  ('exercise habits', 'lung cancer'): 1,
+                                                  ('air pollution exposure', 'lung cancer'): 1,
+                                                  ('air pollution exposure', 'exercise habits'): 1})
diff --git a/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py b/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py
@@ -0,0 +1,56 @@
+import unittest
+from unittest.mock import MagicMock
+from guidance.models._openai import OpenAI
+
+from pywhyllm.suggesters.tuebingen_model_suggester import TuebingenModelSuggester, Strategy
+from pywhyllm.tests.model_suggester.data_providers.tuebingen_model_suggester_data_provider import *
+
+
+class TestTuebingenModelSuggester(unittest.TestCase):
+    def test_suggest_description(self):
+        modeler = TuebingenModelSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_description_expected_response)
+        result = modeler.suggest_description(variable)
+        assert result == test_suggest_description_expected_result
+
+    def test_suggest_onesided_relationship(self):
+        modeler = TuebingenModelSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_expected_response)
+        result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b)
+        assert result == test_suggest_onesided_relationship_expected_result
+
+    def test__build_description_program(self):
+        modeler = TuebingenModelSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        result = modeler._build_description_program(variable)
+        assert result == test__build_description_program_expected_result
+
+    def test_suggest_relationship(self):
+        modeler = TuebingenModelSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_expected_response)
+        result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain,
+                                              strategy=Strategy.ToT_Single, ask_reference=True)
+        assert result == test_suggest_relationship_expected_result
+
+    def test__build_relationship_program(self):
+        modeler = TuebingenModelSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        result = modeler._build_relationship_program(variable_a, description_a, variable_b, description_b, domain,
+                                                     use_description=False, ask_reference=True)
+        assert result == test__build_relationship_program_expected_result
diff --git a/pywhyllm/tests/model_suggester/test_validation_suggester.py b/pywhyllm/tests/model_suggester/test_validation_suggester.py
@@ -0,0 +1,101 @@
+import unittest
+from typing import Dict
+from unittest.mock import MagicMock
+from guidance.models._openai import OpenAI
+
+from pywhyllm.suggesters.validation_suggester import ValidationSuggester
+from pywhyllm.tests.model_suggester.data_providers.validation_suggester_data_provider import *
+from pywhyllm.tests.model_suggester.data_providers.model_suggester_data_provider import *
+from pywhyllm.helpers import RelationshipStrategy
+
+
+class TestValidationSuggester(unittest.TestCase):
+    def test_request_latent_confounders_expected_response(self):
+        modeler = ValidationSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+
+        mock_llm.__getitem__ = MagicMock(return_value=test_latent_confounders_expected_response)
+
+        latent_confounders_counter: Dict[str, int] = dict()
+        result = modeler.request_latent_confounders(test_vars[0], test_vars[1], latent_confounders_counter,
+                                                    domain_expertises[0])
+
+        assert result == test_latent_confounders_expected_results
+
+    def test_request_negative_controls_expected_response(self):
+        modeler = ValidationSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+
+        mock_llm.__getitem__ = MagicMock(return_value=test_negative_controls_expected_response)
+
+        negative_controls_counter: Dict[str, int] = dict()
+        result = modeler.request_negative_controls(test_vars[0], test_vars[1], test_vars, negative_controls_counter,
+                                                   domain_expertises[0])
+
+        assert result == test_negative_controls_expected_results
+
+    def test_request_parent_critique_expected_response(self):
+        modeler = ValidationSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+
+        mock_llm.__getitem__ = MagicMock(return_value=test_parent_critique_expected_response)
+
+        result = modeler.request_parent_critique(test_vars[0], test_vars, domain_expertises[0])
+
+        assert result == test_parent_critique_expected_results
+
+    def test_request_children_critique_expected_response(self):
+        modeler = ValidationSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+
+        mock_llm.__getitem__ = MagicMock(return_value=test_children_critique_expected_response)
+
+        result = modeler.request_children_critique(test_vars[0], test_vars, domain_expertises[0])
+
+        assert result == test_children_critique_expected_results
+
+    def test_pairwise_critique_expected_response(self):
+        modeler = ValidationSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+        mock_llm.__getitem__ = MagicMock(return_value=test_pairwise_critique_expected_response)
+        result = modeler.request_pairwise_critique(domain_expertises[0], test_vars[0], test_vars[1])
+        assert result == test_pairwise_critique_expected_results
+
+    def test_critique_graph(self):
+        modeler = ValidationSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+
+        mock_llm.__add__ = MagicMock(return_value=mock_llm)
+        # parent
+        mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_parent_expected_response)
+        result = modeler.critique_graph(test_vars, test_suggest_relationships_parent_expected_results,
+                                        domain_expertises, RelationshipStrategy.Parent)
+
+        assert result == test_critique_graph_parent_expected_results
+
+        mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_children_expected_response)
+        result = modeler.critique_graph(test_vars, test_suggest_relationships_child_expected_results,
+                                        domain_expertises, RelationshipStrategy.Child)
+
+        assert result == test_critique_graph_children_expected_results
+
+        mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_pairwise_expected_response)
+        result = modeler.critique_graph(test_vars, test_suggest_relationships_pairwise_expected_results,
+                                        domain_expertises, RelationshipStrategy.Pairwise)
+        assert result == test_critique_graph_pairwise_expected_results