Test classes for validation suggester and tuebingen model suggester.

grace-sng7 · grace-sng7 · commit 0a620f78be81 · 2025-03-30T23:44:46.000-05:00
Signed-off-by: Grace Sng &lt;grace.sng75@gmail.com&gt;
diff --git a/pywhyllm/suggesters/tuebingen_model_suggester.py b/pywhyllm/suggesters/tuebingen_model_suggester.py
@@ -21,7 +21,7 @@ def __init__(self, llm=None):
         super().__init__(llm)
 
     def suggest_description(
-            self, variable, context=None, ask_reference=False
+            self, variable, ask_reference=False
     ):
         generate_description = self._build_description_program(variable)
 
@@ -255,11 +255,11 @@ def _build_relationship_program(
                         the answer within the tags, <answer>Yes/No</answer>, and the most influential reference within 
                         the tags <reference>Author, Title, Year of publication</reference>.
                         \n\n\n----------------\n\n\n<answer>Yes</answer>\n<reference>Author, Title, Year of 
-                        publication</reference>\n\n\n----------------\n\n\n<answer>No</answer> {{~/user}}"""
+                        publication</reference>\n\n\n----------------\n\n\n<answer>No</answer>"""
                 else:
                     query["user"] += """When consensus is reached, thinking carefully and factually, explain the council's answer. 
                     Provide the answer within the tags, <answer>Yes/No</answer>.
-                        \n\n\n----------------\n\n\n<answer>Yes</answer>\n\n\n----------------\n\n\n<answer>No</answer> {{~/user}}"""
+                        \n\n\n----------------\n\n\n<answer>Yes</answer>\n\n\n----------------\n\n\n<answer>No</answer>"""
 
             elif use_strategy == Strategy.CoT:
                 if use_description:
diff --git a/pywhyllm/suggesters/validation_suggester.py b/pywhyllm/suggesters/validation_suggester.py
@@ -65,7 +65,7 @@ def request_negative_controls(
             factors_list: list(),
             negative_controls_counter: list(),
             domain_expertise: str,
-            analysis_context: list = CONTEXT
+            analysis_context = CONTEXT
     ):
         negative_controls_list: List[str] = list()
 
diff --git a/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py b/pywhyllm/tests/model_suggester/data_providers/tuebingen_model_suggester_data_provider.py
@@ -8,18 +8,55 @@
 
 # MOCK_RESPONSES
 test_suggest_description_expected_response = "<description>Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states.</description>"
-test_suggest_onesided_relationship_expected_response = "<answer>A</answer>"
-test_suggest_relationship_expected_response = "<answer>Yes</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"
+test_suggest_onesided_relationship_a_cause_b_expected_response = "<answer>A</answer>"
+test_suggest_onesided_relationship_a_not_cause_b_expected_response = "<answer>B</answer>"
+test_suggest_relationship_a_cause_b_expected_response = "<answer>Yes</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"
+test_suggest_relationship_a_not_cause_b_expected_response = "<answer>No</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"
+
 # ASSERTIONS
-test_suggest_description_expected_result = [
-    "Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states."]
-test_suggest_onesided_relationship_expected_result = 1
-test__build_description_program_expected_result = {
+test_suggest_description_expected_result = ([
+                                                "Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states."],
+                                            [])
+test_suggest_onesided_relationship_a_cause_b_expected_result = 1
+test_suggest_onesided_relationship_a_not_cause_b_expected_result = 0
+test__build_description_program_no_context_no_reference_expected_result = {
     'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n            is to provide factual and succinct description of the given concept.',
     'user': " Describe the concept of water.\n                    In one sentence, provide a factual and succinct description of water\n                        Let's think step-by-step to make sure that we have a proper and clear description. Then provide \n                        your final answer within the tags, <description></description>."}
-test_suggest_relationship_expected_result = (1,
-                                             [
-                                                 'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
+test__build_description_program_no_context_with_reference_expected_result = {
+    'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n            is to provide factual and succinct description of the given concept.',
+    'user': ' Describe the concept of water.\n                    In one sentence, provide a factual and succinct description of water"\n                        Then provide two research papers that support your description.\n                        Let\'s think step-by-step to make sure that we have a proper and clear description. Then provide \n                        your final answer within the tags, <description></description>, and each research paper within the \n                        tags <paper></paper>.'}
+test__build_description_program_with_context_with_reference_expected_result = {
+    'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal is \n            to provide factual and succinct descriptions related to the given concept and context.',
+    'user': "Using this context about the particular variable, describe the concept of water.\n            In one sentence, provide a factual and succinct description of waterThen provide two research papers that support your description.\n                Let's think step-by-step to make sure that we have a proper and clear description. Then provide your final \n                answer within the tags, <description></description>, and each research paper within the tags <reference></reference>."}
+test__build_description_program_with_context_no_reference_expected_result = {
+    'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal is \n            to provide factual and succinct descriptions related to the given concept and context.',
+    'user': "Using this context about the particular variable, describe the concept of water.\n            In one sentence, provide a factual and succinct description of water\n                    Let's think step-by-step to make sure that we have a proper and clear description. Then provide your final \n                    answer within the tags, <description></description>."}
+test_suggest_relationship_a_cause_b_expected_result = (1,
+                                                       [
+                                                           'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
+test_suggest_relationship_a_not_cause_b_expected_result = (0,
+                                                           [
+                                                               'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
 test__build_relationship_program_expected_result = {
-    'system': 'You are a helpful assistant on causal reasoning and biology. Your goal is to answer \n            questions about cause and effect in a factual and concise way.',
-    'user': "can changing water intake change hydration level? Answer Yes or No.At each step, each expert include a reference to a research paper that supports \n                    their argument. They will provide a one sentence summary of the paper and how it supports their argument. \n                        Then they will answer whether a change in water intake changes hydration level. Answer Yes or No.\n                        When consensus is reached, thinking carefully and factually, explain the council's answer. Provide \n                        the answer within the tags, <answer>Yes/No</answer>, and the most influential reference within \n                        the tags <reference>Author, Title, Year of publication</reference>.\n                        \n\n\n----------------\n\n\n<answer>Yes</answer>\n<reference>Author, Title, Year of \n                        publication</reference>\n\n\n----------------\n\n\n<answer>No</answer> {~/user}"}
+    'system': 'You are a helpful assistant on causal reasoning and biology. Your '
+              'goal is to answer \n'
+              '            questions about cause and effect in a factual and '
+              'concise way.',
+    'user': 'can changing water intake change hydration level? Answer Yes or '
+            'No.When consensus is reached, thinking carefully and factually, '
+            "explain the council's answer. \n"
+            '                    Provide the answer within the tags, '
+            '<answer>Yes/No</answer>.\n'
+            '                        \n'
+            '\n'
+            '\n'
+            '----------------\n'
+            '\n'
+            '\n'
+            '<answer>Yes</answer>\n'
+            '\n'
+            '\n'
+            '----------------\n'
+            '\n'
+            '\n'
+            '<answer>No</answer>'}
diff --git a/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py b/pywhyllm/tests/model_suggester/data_providers/validation_suggester_data_provider.py
@@ -20,9 +20,14 @@
                                                   "<answer>B</answer>", "<answer>B</answer>", "<answer>B</answer>"]
 
 # ASSERTIONS
-test_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
-                                            ['socio-economic status', 'mental health'])
-test_negative_controls_expected_results = ({'exercise habits': 1}, ['exercise habits'])
+test_suggest_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
+                                                    [{'mental health': 1, 'socio-economic status': 1},
+                                                     ['socio-economic status', 'mental health']])
+test_request_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
+                                                    ['socio-economic status', 'mental health'])
+test_suggest_negative_controls_expected_results = (
+{'exercise habits': 1}, [{'exercise habits': 1}, ['exercise habits']])
+test_request_negative_controls_expected_results = ({'exercise habits': 1}, ['exercise habits'])
 test_parent_critique_expected_results = []
 test_children_critique_expected_results = ['lung cancer']
 test_pairwise_critique_expected_results = ('smoking', 'lung cancer')
diff --git a/pywhyllm/tests/model_suggester/test_identification_suggester.py b/pywhyllm/tests/model_suggester/test_identification_suggester.py
@@ -3,13 +3,20 @@
 from guidance.models._openai import OpenAI
 
 from pywhyllm.suggesters.identification_suggester import IdentificationSuggester
+from pywhyllm.suggesters.model_suggester import ModelSuggester
 from pywhyllm.tests.model_suggester.data_providers.model_suggester_data_provider import *
 from pywhyllm.tests.model_suggester.data_providers.identification_suggester_data_provider import *
-from pywhyllm.tests.model_suggester.test_model_suggester import TestModelSuggester
 
 class TestIdentificationSuggester(unittest.TestCase):
     def test_suggest_backdoor(self):
-        return TestModelSuggester().test_suggest_confounders()
+        modeler = IdentificationSuggester()
+        mock_llm = MagicMock(spec=OpenAI)
+        modeler.llm = mock_llm
+        mock_model_suggester = MagicMock(spec=ModelSuggester)
+        modeler.model_suggester = mock_model_suggester
+        mock_model_suggester.suggest_confounders = MagicMock(return_value=test_suggest_confounders_expected_results)
+        result = modeler.suggest_backdoor(test_vars[0], test_vars[1], test_vars, test_domain_expertises_expected_result)
+        assert result == test_suggest_confounders_expected_results
 
     def test_suggest_mediators(self):
         modeler = IdentificationSuggester()
diff --git a/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py b/pywhyllm/tests/model_suggester/test_tuebingen_model_suggester.py
@@ -14,7 +14,7 @@ def test_suggest_description(self):
 
         mock_llm.__add__ = MagicMock(return_value=mock_llm)
         mock_llm.__getitem__ = MagicMock(return_value=test_suggest_description_expected_response)
-        result = modeler.suggest_description(variable)
+        result = modeler.suggest_description(variable, True)
         assert result == test_suggest_description_expected_result
 
     def test_suggest_onesided_relationship(self):
@@ -23,34 +23,55 @@ def test_suggest_onesided_relationship(self):
         modeler.llm = mock_llm
 
         mock_llm.__add__ = MagicMock(return_value=mock_llm)
-        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_expected_response)
+        #Given the two variables and their descriptions, variable a causes variable b
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_a_cause_b_expected_response)
         result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b)
-        assert result == test_suggest_onesided_relationship_expected_result
+        assert result == test_suggest_onesided_relationship_a_cause_b_expected_result
+
+        #Given the two variables and their descriptions, variable a does not cause variable b
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_a_not_cause_b_expected_response)
+        result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b)
+        assert result == test_suggest_onesided_relationship_a_not_cause_b_expected_result
 
     def test__build_description_program(self):
         modeler = TuebingenModelSuggester()
         mock_llm = MagicMock(spec=OpenAI)
         modeler.llm = mock_llm
-
-        result = modeler._build_description_program(variable)
-        assert result == test__build_description_program_expected_result
+        #Test no context, no reference
+        result = modeler._build_description_program(variable, False, False)
+        assert result == test__build_description_program_no_context_no_reference_expected_result
+        #Test no context, with reference
+        result = modeler._build_description_program(variable, False, True)
+        assert result == test__build_description_program_no_context_with_reference_expected_result
+        #Test with context, no reference
+        result = modeler._build_description_program(variable, True, False)
+        assert result == test__build_description_program_with_context_no_reference_expected_result
+        #Test with context, with reference
+        result = modeler._build_description_program(variable, True, True)
+        assert result == test__build_description_program_with_context_with_reference_expected_result
 
     def test_suggest_relationship(self):
         modeler = TuebingenModelSuggester()
         mock_llm = MagicMock(spec=OpenAI)
         modeler.llm = mock_llm
 
         mock_llm.__add__ = MagicMock(return_value=mock_llm)
-        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_expected_response)
+        #Given the two variables and their descriptions, variable a causes variable b
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_a_cause_b_expected_response)
+        result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain,
+                                              strategy=Strategy.ToT_Single, ask_reference=True)
+        assert result == test_suggest_relationship_a_cause_b_expected_result
+        #Given the two variables and their descriptions, variable a does not cause variable b
+        mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_a_not_cause_b_expected_response)
         result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain,
                                               strategy=Strategy.ToT_Single, ask_reference=True)
-        assert result == test_suggest_relationship_expected_result
+        assert result == test_suggest_relationship_a_not_cause_b_expected_result
 
     def test__build_relationship_program(self):
         modeler = TuebingenModelSuggester()
         mock_llm = MagicMock(spec=OpenAI)
         modeler.llm = mock_llm
 
         result = modeler._build_relationship_program(variable_a, description_a, variable_b, description_b, domain,
-                                                     use_description=False, ask_reference=True)
+                                                     use_description=False, ask_reference=False)
         assert result == test__build_relationship_program_expected_result
diff --git a/pywhyllm/tests/model_suggester/test_validation_suggester.py b/pywhyllm/tests/model_suggester/test_validation_suggester.py