Skip to content

Commit 20e2349

Browse files
committed
Add unit tests for add_and_update_tasks function
1 parent e9625a1 commit 20e2349

File tree

3 files changed

+117
-2
lines changed

3 files changed

+117
-2
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"capability_name": "math_mathematics_modeling_real_world",
3+
"capability_description": "The math_mathematics_modeling_real_world capability consists of 1500 real-world mathematical modeling problems. Each problem requires the application of mathematical concepts to formulate and solve real-life scenarios, emphasizing critical thinking and problem-solving skills.",
4+
"capability_domain": "math",
5+
"capability_instructions": "f\"\"\"Solve the following real-world modeling problem step by step. The last line of your response should be of the form \"ANSWER: $ANSWER\" (without quotes) where $ANSWER is the answer to the problem.\n\nProblem: {t[\"problem\"]}\n\nRemember to put your answer on its own line at the end in the form \"ANSWER:$ANSWER\" (without quotes) where $ANSWER is the answer to the problem, and you do not need to use a \\boxed command.\"\"\"",
6+
"capability_data": [
7+
{
8+
"id": "1",
9+
"problem": "A company wants to determine the optimal number of units to produce in order to maximize profit. The profit function is given by P(x) = -2x^2 + 40x - 150, where x is the number of units produced. How many units should the company produce to maximize profit?",
10+
"answer": "10"
11+
},
12+
{
13+
"id": "2",
14+
"problem": "A farmer has 240 meters of fencing to create a rectangular pen. What dimensions will maximize the area of the pen?",
15+
"answer": "60 by 60"
16+
},
17+
{
18+
"id": "3",
19+
"problem": "A car rental company charges a flat fee of $50 plus $20 per day. If a customer has a budget of $200, how many days can they rent a car?",
20+
"answer": "7"
21+
}
22+
]
23+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
class Capability:
2+
@staticmethod
3+
def repr_tasks() -> dict[str, dict]:
4+
return {
5+
"1": {
6+
"problem": "A company wants to determine the optimal number of units to produce in order to maximize profit. The profit function is given by P(x) = -2x^2 + 40x - 150, where x is the number of units produced. How many units should the company produce to maximize profit?",
7+
"answer": "10"
8+
},
9+
"2": {
10+
"problem": "A farmer has 240 meters of fencing to create a rectangular pen. What dimensions will maximize the area of the pen?",
11+
"answer": "60 by 60"
12+
},
13+
"3": {
14+
"problem": "A car rental company charges a flat fee of $50 plus $20 per day. If a customer has a budget of $200, how many days can they rent a car?",
15+
"answer": "7"
16+
}
17+
}
18+
19+
@staticmethod
20+
def get_instructions(t: dict) -> str:
21+
return f"""Solve the following real-world modeling problem step by step. The last line of your response should be of the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the problem.\n\nProblem: {t["problem"]}\n\nRemember to put your answer on its own line at the end in the form "ANSWER:$ANSWER" (without quotes) where $ANSWER is the answer to the problem, and you do not need to use a \\boxed command."""
22+
23+
@staticmethod
24+
def score(t: dict, submission: str) -> float | None:
25+
return 1.0 if submission==t["answer"] else 0.0

tests/src/test_capability_class.py

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def test_create_capability_from_dict():
190190
"class": '```python\nclass Capability:\n @staticmethod\n def repr_tasks() -> dict[str, dict]:\n return {\n "1": {\n "problem": "Prove that the number of ways to choose 2 elements from a set of n elements is equal to the number of ways to choose n-2 elements from the same set.",\n "answer": "\\\\binom{n}{2} = \\\\binom{n}{n-2}"\n },\n "2": {\n "problem": "Show that for any positive integer n, the sum of the first n odd numbers equals n^2.",\n "answer": "1 + 3 + 5 + ... + (2n-1) = n^2"\n },\n "3": {\n "problem": "Demonstrate that \\sum_{k=0}^{n} \\binom{n}{k} = 2^n using a combinatorial argument.",\n "answer": "\\\\sum_{k=0}^{n} \\binom{n}{k} = 2^n"\n }\n}\n\n @staticmethod\n def get_instructions(t: dict) -> str:\n return f"""Provide a combinatorial proof for the following problem. The last line of your response should be of the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is your proof or explanation.\\n\\nProblem: {t["problem"]}\\n\\nRemember to put your proof or explanation on its own line at the end in the form "ANSWER:$ANSWER" (without quotes) where $ANSWER is your proof or explanation."""\n\n @staticmethod\n def score(t: dict, submission: str) -> float | None:\n return 1.0 if submission.lower().strip() == t["answer"].lower().strip() else 0.0\n```',
191191
}
192192
gen_capability_tests_dir = os.path.join(
193-
test_dir, "capabilities", gen_capability_dict["domain"]
193+
test_dir, "capabilities_t1", gen_capability_dict["domain"]
194194
)
195195
os.makedirs(gen_capability_tests_dir, exist_ok=True)
196196

@@ -200,7 +200,7 @@ def test_create_capability_from_dict():
200200
assert capability.name == gen_capability_dict["name"]
201201
assert capability.description == gen_capability_dict["description"]
202202
assert capability.domain == gen_capability_dict["domain"]
203-
shutil.rmtree(os.path.join(test_dir, "capabilities"))
203+
shutil.rmtree(os.path.join(test_dir, "capabilities_t1"))
204204

205205

206206
def test_extract_and_parse_response():
@@ -235,3 +235,70 @@ def test_extract_and_parse_response():
235235
assert len(extracted_response_dict["parsed_response"]) == len(
236236
dummy_response_dict["response_json"]
237237
)
238+
239+
240+
def test_add_and_update_tasks_new_task():
241+
"""
242+
Test the add_and_update_tasks method of the Capability class.
243+
244+
This test verifies that the add_and_update_tasks method correctly
245+
adds and updates tasks in the capability. It checks the following:
246+
- The number of tasks in the capability is updated correctly.
247+
- The tasks are added and updated as expected.
248+
- The capability's representation string remains unchanged.
249+
"""
250+
capability_path = "capabilities_t2/math/math_mathematics_modeling_real_world"
251+
# Create a copy
252+
shutil.copytree(
253+
os.path.join(test_dir, capability_path),
254+
os.path.join(test_dir, f"copy_{capability_path}"),
255+
dirs_exist_ok=True,
256+
)
257+
# Read the capability configuration from the copy
258+
capability = Capability(os.path.join(test_dir, f"copy_{capability_path}"))
259+
# Create a list of new tasks to add
260+
new_tasks = [
261+
{"id": "4", "problem": "Problem 4", "answer": "Answer 4"},
262+
]
263+
# Add and update tasks in the capability
264+
capability.add_and_update_tasks(tasks=new_tasks)
265+
# Check if the number of tasks is updated correctly and
266+
# the update doesnt affect the representation string
267+
original_capability = Capability(os.path.join(test_dir, capability_path))
268+
assert (len(original_capability._data) + len(new_tasks)) == len(capability._data)
269+
assert capability.capability_repr_class_str == original_capability.capability_repr_class_str
270+
# Clean up
271+
shutil.rmtree(os.path.join(test_dir, f"copy_{capability_path.split('/')[0]}"))
272+
273+
274+
def test_add_and_update_tasks_repr_tasks():
275+
capability_path = "capabilities_t2/math/math_mathematics_modeling_real_world"
276+
# Create a copy
277+
shutil.copytree(
278+
os.path.join(test_dir, capability_path),
279+
os.path.join(test_dir, f"copy_{capability_path}"),
280+
dirs_exist_ok=True,
281+
)
282+
# Read the capability configuration from the copy
283+
capability = Capability(os.path.join(test_dir, f"copy_{capability_path}"))
284+
# Create a list of new tasks to add
285+
repr_tasks = [
286+
{"id": "1", "problem": "Problem 1", "answer": "Answer 1"},
287+
{"id": "2", "problem": "Problem 2", "answer": "Answer 2"},
288+
{"id": "3", "problem": "Problem 3", "answer": "Answer 3"},
289+
]
290+
repr_tasks.sort(key=lambda x: x["id"])
291+
# Add and update tasks in the capability
292+
capability.add_and_update_tasks(tasks=repr_tasks)
293+
# Check if the existing tasks are updated correctly
294+
original_capability = Capability(os.path.join(test_dir, capability_path))
295+
assert len(original_capability._data) == len(capability._data)
296+
assert capability._data[0]["problem"] == repr_tasks[0]["problem"]
297+
assert capability._data[1]["problem"] == repr_tasks[1]["problem"]
298+
assert capability._data[2]["problem"] == repr_tasks[2]["problem"]
299+
assert capability._data[0]["answer"] == repr_tasks[0]["answer"]
300+
assert capability._data[1]["answer"] == repr_tasks[1]["answer"]
301+
assert capability._data[2]["answer"] == repr_tasks[2]["answer"]
302+
assert capability.capability_repr_class_str != original_capability.capability_repr_class_str
303+
# Clean up
304+
shutil.rmtree(os.path.join(test_dir, f"copy_{capability_path.split('/')[0]}"))

0 commit comments

Comments
 (0)