Skip to content

Commit f6b7ef3

Browse files
committed
Test classes for validation suggester and tuebingen model suggester.
Signed-off-by: Grace Sng <[email protected]>
1 parent c149fb1 commit f6b7ef3

File tree

6 files changed

+349
-206
lines changed

6 files changed

+349
-206
lines changed

pywhyllm/suggesters/tuebingen_model_suggester.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class Strategy(Enum):
1717

1818

1919
class TuebingenModelSuggester(ModelSuggester):
20-
def __init__(self, llm):
20+
def __init__(self, llm=None):
2121
super().__init__(llm)
2222

2323
def suggest_description(

pywhyllm/suggesters/validation_suggester.py

Lines changed: 110 additions & 205 deletions
Large diffs are not rendered by default.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# TESTS
2+
variable = "water"
3+
variable_a = "water intake"
4+
description_a = "the amount of water a person drinks per day"
5+
variable_b = "hydration level"
6+
description_b = "the level of hydration in the body"
7+
domain = "biology"
8+
9+
# MOCK_RESPONSES
10+
test_suggest_description_expected_response = "<description>Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states.</description>"
11+
test_suggest_onesided_relationship_expected_response = "<answer>A</answer>"
12+
test_suggest_relationship_expected_response = "<answer>Yes</answer> <reference>Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. \"Water, hydration and health.\" Nutrition reviews 68.8 (2010): 439-458.</reference>"
13+
# ASSERTIONS
14+
test_suggest_description_expected_result = [
15+
"Water is a transparent, tasteless, odorless, nearly colorless liquid that is essential for all life forms and covers approximately 71% of Earth's surface, also existing in solid (ice) and gas (vapor) states."]
16+
test_suggest_onesided_relationship_expected_result = 1
17+
test__build_description_program_expected_result = {
18+
'system': 'You are a helpful assistant for writing concise and peer-reviewed descriptions. Your goal \n is to provide factual and succinct description of the given concept.',
19+
'user': " Describe the concept of water.\n In one sentence, provide a factual and succinct description of water\n Let's think step-by-step to make sure that we have a proper and clear description. Then provide \n your final answer within the tags, <description></description>."}
20+
test_suggest_relationship_expected_result = (1,
21+
[
22+
'Popkin, Barry M., Kristen E. D\'Anci, and Irwin H. Rosenberg. "Water, hydration and health." Nutrition reviews 68.8 (2010): 439-458.'])
23+
test__build_relationship_program_expected_result = {
24+
'system': 'You are a helpful assistant on causal reasoning and biology. Your goal is to answer \n questions about cause and effect in a factual and concise way.',
25+
'user': "can changing water intake change hydration level? Answer Yes or No.At each step, each expert include a reference to a research paper that supports \n their argument. They will provide a one sentence summary of the paper and how it supports their argument. \n Then they will answer whether a change in water intake changes hydration level. Answer Yes or No.\n When consensus is reached, thinking carefully and factually, explain the council's answer. Provide \n the answer within the tags, <answer>Yes/No</answer>, and the most influential reference within \n the tags <reference>Author, Title, Year of publication</reference>.\n \n\n\n----------------\n\n\n<answer>Yes</answer>\n<reference>Author, Title, Year of \n publication</reference>\n\n\n----------------\n\n\n<answer>No</answer> {~/user}"}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# TESTS
2+
test_vars = ["smoking", "lung cancer", "exercise habits", "air pollution exposure"]
3+
domain_expertises = ['Epidemiology']
4+
5+
# MOCK RESPONSES
6+
test_latent_confounders_expected_response = "<confounding_factor>socio-economic status</confounding_factor> <confounding_factor>mental health</confounding_factor>"
7+
test_negative_controls_expected_response = "<negative_control>exercise habits</negative_control>"
8+
test_parent_critique_expected_response = "None"
9+
test_children_critique_expected_response = "<influenced_factor>lung cancer</influenced_factor>"
10+
test_pairwise_critique_expected_response = "The answer is <answer>A</answer>"
11+
test_critique_graph_parent_expected_response = ["None",
12+
"<influencing_factor>smoking</influencing_factor> <influencing_factor>air pollution exposure</influencing_factor>",
13+
"<influencing_factor>air pollution exposure</influencing_factor>",
14+
"None"]
15+
test_critique_graph_children_expected_response = ["<influenced_factor>lung cancer</influenced_factor>",
16+
"<influenced_factor>exercise habits</influenced_factor>",
17+
"<influenced_factor>lung cancer</influenced_factor>",
18+
"<influenced_factor>lung cancer</influenced_factor> <influenced_factor>exercise habits</influenced_factor>"]
19+
test_critique_graph_pairwise_expected_response = ["<answer>A</answer>", "<answer>A</answer>", "<answer>C</answer>",
20+
"<answer>B</answer>", "<answer>B</answer>", "<answer>B</answer>"]
21+
22+
# ASSERTIONS
23+
test_latent_confounders_expected_results = ({'mental health': 1, 'socio-economic status': 1},
24+
['socio-economic status', 'mental health'])
25+
test_negative_controls_expected_results = ({'exercise habits': 1}, ['exercise habits'])
26+
test_parent_critique_expected_results = []
27+
test_children_critique_expected_results = ['lung cancer']
28+
test_pairwise_critique_expected_results = ('smoking', 'lung cancer')
29+
test_critique_graph_parent_expected_results = ({('air pollution exposure', 'exercise habits'): 1,
30+
('air pollution exposure', 'lung cancer'): 1,
31+
('air pollution exposure', 'smoking'): 1,
32+
('smoking', 'lung cancer'): 1},
33+
{('air pollution exposure', 'exercise habits'): 1,
34+
('air pollution exposure', 'lung cancer'): 1,
35+
('smoking', 'lung cancer'): 1})
36+
test_critique_graph_children_expected_results = ({('air pollution exposure', 'smoking'): 1,
37+
('exercise habits', 'air pollution exposure'): 1,
38+
('exercise habits', 'smoking'): 1,
39+
('lung cancer', 'air pollution exposure'): 1,
40+
('lung cancer', 'exercise habits'): 1,
41+
('lung cancer', 'smoking'): 1},
42+
{('exercise habits', 'air pollution exposure'): 1,
43+
('exercise habits', 'lung cancer'): 1,
44+
('lung cancer', 'air pollution exposure'): 1,
45+
('lung cancer', 'exercise habits'): 1,
46+
('lung cancer', 'smoking'): 1})
47+
test_critique_graph_pairwise_expected_results = ({('air pollution exposure', 'exercise habits'): 1,
48+
('exercise habits', 'lung cancer'): 1,
49+
('smoking', 'air pollution exposure'): 1,
50+
('smoking', 'exercise habits'): 1,
51+
('smoking', 'lung cancer'): 1},
52+
{('smoking', 'lung cancer'): 1,
53+
('smoking', 'exercise habits'): 1,
54+
('exercise habits', 'lung cancer'): 1,
55+
('air pollution exposure', 'lung cancer'): 1,
56+
('air pollution exposure', 'exercise habits'): 1})
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import unittest
2+
from unittest.mock import MagicMock
3+
from guidance.models._openai import OpenAI
4+
5+
from pywhyllm.suggesters.tuebingen_model_suggester import TuebingenModelSuggester, Strategy
6+
from pywhyllm.tests.model_suggester.data_providers.tuebingen_model_suggester_data_provider import *
7+
8+
9+
class TestTuebingenModelSuggester(unittest.TestCase):
10+
def test_suggest_description(self):
11+
modeler = TuebingenModelSuggester()
12+
mock_llm = MagicMock(spec=OpenAI)
13+
modeler.llm = mock_llm
14+
15+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
16+
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_description_expected_response)
17+
result = modeler.suggest_description(variable)
18+
assert result == test_suggest_description_expected_result
19+
20+
def test_suggest_onesided_relationship(self):
21+
modeler = TuebingenModelSuggester()
22+
mock_llm = MagicMock(spec=OpenAI)
23+
modeler.llm = mock_llm
24+
25+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
26+
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_onesided_relationship_expected_response)
27+
result = modeler.suggest_onesided_relationship(variable_a, description_a, variable_b, description_b)
28+
assert result == test_suggest_onesided_relationship_expected_result
29+
30+
def test__build_description_program(self):
31+
modeler = TuebingenModelSuggester()
32+
mock_llm = MagicMock(spec=OpenAI)
33+
modeler.llm = mock_llm
34+
35+
result = modeler._build_description_program(variable)
36+
assert result == test__build_description_program_expected_result
37+
38+
def test_suggest_relationship(self):
39+
modeler = TuebingenModelSuggester()
40+
mock_llm = MagicMock(spec=OpenAI)
41+
modeler.llm = mock_llm
42+
43+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
44+
mock_llm.__getitem__ = MagicMock(return_value=test_suggest_relationship_expected_response)
45+
result = modeler.suggest_relationship(variable_a, variable_b, description_a, description_b, domain,
46+
strategy=Strategy.ToT_Single, ask_reference=True)
47+
assert result == test_suggest_relationship_expected_result
48+
49+
def test__build_relationship_program(self):
50+
modeler = TuebingenModelSuggester()
51+
mock_llm = MagicMock(spec=OpenAI)
52+
modeler.llm = mock_llm
53+
54+
result = modeler._build_relationship_program(variable_a, description_a, variable_b, description_b, domain,
55+
use_description=False, ask_reference=True)
56+
assert result == test__build_relationship_program_expected_result
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import unittest
2+
from typing import Dict
3+
from unittest.mock import MagicMock
4+
from guidance.models._openai import OpenAI
5+
6+
from pywhyllm.suggesters.validation_suggester import ValidationSuggester
7+
from pywhyllm.tests.model_suggester.data_providers.validation_suggester_data_provider import *
8+
from pywhyllm.tests.model_suggester.data_providers.model_suggester_data_provider import *
9+
from pywhyllm.helpers import RelationshipStrategy
10+
11+
12+
class TestValidationSuggester(unittest.TestCase):
13+
def test_request_latent_confounders_expected_response(self):
14+
modeler = ValidationSuggester()
15+
mock_llm = MagicMock(spec=OpenAI)
16+
modeler.llm = mock_llm
17+
18+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
19+
20+
mock_llm.__getitem__ = MagicMock(return_value=test_latent_confounders_expected_response)
21+
22+
latent_confounders_counter: Dict[str, int] = dict()
23+
result = modeler.request_latent_confounders(test_vars[0], test_vars[1], latent_confounders_counter,
24+
domain_expertises[0])
25+
26+
assert result == test_latent_confounders_expected_results
27+
28+
def test_request_negative_controls_expected_response(self):
29+
modeler = ValidationSuggester()
30+
mock_llm = MagicMock(spec=OpenAI)
31+
modeler.llm = mock_llm
32+
33+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
34+
35+
mock_llm.__getitem__ = MagicMock(return_value=test_negative_controls_expected_response)
36+
37+
negative_controls_counter: Dict[str, int] = dict()
38+
result = modeler.request_negative_controls(test_vars[0], test_vars[1], test_vars, negative_controls_counter,
39+
domain_expertises[0])
40+
41+
assert result == test_negative_controls_expected_results
42+
43+
def test_request_parent_critique_expected_response(self):
44+
modeler = ValidationSuggester()
45+
mock_llm = MagicMock(spec=OpenAI)
46+
modeler.llm = mock_llm
47+
48+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
49+
50+
mock_llm.__getitem__ = MagicMock(return_value=test_parent_critique_expected_response)
51+
52+
result = modeler.request_parent_critique(test_vars[0], test_vars, domain_expertises[0])
53+
54+
assert result == test_parent_critique_expected_results
55+
56+
def test_request_children_critique_expected_response(self):
57+
modeler = ValidationSuggester()
58+
mock_llm = MagicMock(spec=OpenAI)
59+
modeler.llm = mock_llm
60+
61+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
62+
63+
mock_llm.__getitem__ = MagicMock(return_value=test_children_critique_expected_response)
64+
65+
result = modeler.request_children_critique(test_vars[0], test_vars, domain_expertises[0])
66+
67+
assert result == test_children_critique_expected_results
68+
69+
def test_pairwise_critique_expected_response(self):
70+
modeler = ValidationSuggester()
71+
mock_llm = MagicMock(spec=OpenAI)
72+
modeler.llm = mock_llm
73+
74+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
75+
mock_llm.__getitem__ = MagicMock(return_value=test_pairwise_critique_expected_response)
76+
result = modeler.request_pairwise_critique(domain_expertises[0], test_vars[0], test_vars[1])
77+
assert result == test_pairwise_critique_expected_results
78+
79+
def test_critique_graph(self):
80+
modeler = ValidationSuggester()
81+
mock_llm = MagicMock(spec=OpenAI)
82+
modeler.llm = mock_llm
83+
84+
mock_llm.__add__ = MagicMock(return_value=mock_llm)
85+
# parent
86+
mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_parent_expected_response)
87+
result = modeler.critique_graph(test_vars, test_suggest_relationships_parent_expected_results,
88+
domain_expertises, RelationshipStrategy.Parent)
89+
90+
assert result == test_critique_graph_parent_expected_results
91+
92+
mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_children_expected_response)
93+
result = modeler.critique_graph(test_vars, test_suggest_relationships_child_expected_results,
94+
domain_expertises, RelationshipStrategy.Child)
95+
96+
assert result == test_critique_graph_children_expected_results
97+
98+
mock_llm.__getitem__ = MagicMock(side_effect=test_critique_graph_pairwise_expected_response)
99+
result = modeler.critique_graph(test_vars, test_suggest_relationships_pairwise_expected_results,
100+
domain_expertises, RelationshipStrategy.Pairwise)
101+
assert result == test_critique_graph_pairwise_expected_results

0 commit comments

Comments
 (0)