Nomenclature change: problem -> task

xeon27 · xeon27 · commit a8e6c5c31f07 · 2025-03-28T17:00:15.000-04:00
diff --git a/src/generate_tasks.py b/src/generate_tasks.py
@@ -5,64 +5,64 @@
 from model import Model
 from utils.capability_utils import extract_and_parse_response
 from utils.prompts import (
-    PROBLEM_GENERATION_RESPONSE_JSON_FORMAT,
-    PROBLEM_GENERATION_SYSTEM_PROMPT,
-    PROBLEM_GENERATION_USER_PROMPT,
-    PROBLEM_GENERATION_ZERO_OR_FEW_SHOT_PATCH,
+    TASK_GENERATION_RESPONSE_JSON_FORMAT,
+    TASK_GENERATION_SYSTEM_PROMPT,
+    TASK_GENERATION_USER_PROMPT,
+    TASK_GENERATION_ZERO_OR_FEW_SHOT_PATCH,
 )
 
 
-def get_problem_generation_prompt(
+def get_task_generation_prompt(
     capability: Capability,
-    num_gen_problems: int,
+    num_gen_tasks: int,
     few_shot: bool = False,
-    sample_problems: List[Dict[str, Any]] | None = None,
+    sample_tasks: List[Dict[str, Any]] | None = None,
 ) -> Tuple[str, str]:
     """
-    Generate the system and user prompts for problem generation.
+    Generate the system and user prompts for task generation.
 
-    Generate the system and user prompts for problem generation
-    based on the given capability, number of problems to generate,
+    Generate the system and user prompts for task generation
+    based on the given capability, number of tasks to generate,
     and the few-shot setting.
 
     Args
     ----
-        capability (Capability): The capability to generate problems for.
-        num_gen_problems (int): The number of problems to generate.
+        capability (Capability): The capability to generate tasks for.
+        num_gen_tasks (int): The number of tasks to generate.
         few_shot (bool, optional): The few-shot setting. Defaults to False.
-        sample_problems (List[Dict[str, Any]] | None, optional): The sample problems
+        sample_tasks (List[Dict[str, Any]] | None, optional): The sample tasks
             to use. Defaults to None.
 
     Returns
     -------
         Tuple[str, str]: The system and user prompts.
     """
-    assert (few_shot and (sample_problems is not None)) or (not few_shot), (
-        "Few-shot setting is enabled but no sample problems are provided."
+    assert (few_shot and (sample_tasks is not None)) or (not few_shot), (
+        "Few-shot setting is enabled but no sample tasks are provided."
     )
     prompt_type = "few_shot" if few_shot else "zero_shot"
-    sys_prompt = PROBLEM_GENERATION_SYSTEM_PROMPT.format(
-        zero_or_few_shot_patch=PROBLEM_GENERATION_ZERO_OR_FEW_SHOT_PATCH[prompt_type][
+    sys_prompt = TASK_GENERATION_SYSTEM_PROMPT.format(
+        zero_or_few_shot_patch=TASK_GENERATION_ZERO_OR_FEW_SHOT_PATCH[prompt_type][
             "sys"
         ],
-        response_json_format=PROBLEM_GENERATION_RESPONSE_JSON_FORMAT,
+        response_json_format=TASK_GENERATION_RESPONSE_JSON_FORMAT,
     )
-    user_zero_or_few_shot_patch = PROBLEM_GENERATION_ZERO_OR_FEW_SHOT_PATCH[
-        prompt_type
-    ]["user"]
-    if few_shot and sample_problems is not None:
+    user_zero_or_few_shot_patch = TASK_GENERATION_ZERO_OR_FEW_SHOT_PATCH[prompt_type][
+        "user"
+    ]
+    if few_shot and sample_tasks is not None:
         user_zero_or_few_shot_patch = user_zero_or_few_shot_patch.format(
-            capability_sample_problems=json.dumps(
-                {f"problem_{elm['id']}": elm["problem"] for elm in sample_problems},
+            capability_sample_tasks=json.dumps(
+                {f"task_{elm['id']}": elm["problem"] for elm in sample_tasks},
                 indent=4,
             ),
         )
-    user_prompt = PROBLEM_GENERATION_USER_PROMPT.format(
+    user_prompt = TASK_GENERATION_USER_PROMPT.format(
         capability_name=capability.name,
         capability_description=capability.description,
         capability_domain=capability.domain,
         zero_or_few_shot_patch=user_zero_or_few_shot_patch,
-        num_gen_problems=num_gen_problems,
+        num_gen_tasks=num_gen_tasks,
     )
     return sys_prompt, user_prompt
 
@@ -120,18 +120,18 @@ def generate_tasks_using_llm(
     #   c. using a scoring function
 
     # Generate task problems
-    # Extract sample problems from representative tasks
-    sample_problems = capability.get_repr_tasks()
-    for task in sample_problems:
+    # Extract sample tasks from representative tasks
+    sample_tasks = capability.get_repr_tasks()
+    for task in sample_tasks:
         # Remove the answer
         task.pop("answer", None)
 
-    # Generate new problems using the scientist LLM
-    sys_prompt, user_prompt = get_problem_generation_prompt(
+    # Generate new tasks using the scientist LLM
+    sys_prompt, user_prompt = get_task_generation_prompt(
         capability=capability,
-        num_gen_problems=num_tasks,
+        num_gen_tasks=num_tasks,
         few_shot=kwargs.get("few_shot", True),
-        sample_problems=sample_problems,
+        sample_tasks=sample_tasks,
     )
     response, task_gen_metadata = scientist_llm.generate(
         sys_prompt=sys_prompt,
@@ -143,17 +143,17 @@ def generate_tasks_using_llm(
     print(f"Output:\n\n{response}\n\n")
     print(f"Metadata: {task_gen_metadata}")
     parsed_response = extract_and_parse_response(response)
-    new_problems = parsed_response["parsed_response"]
-    # Combine with sample problems to get the full set of problems
-    start_id = len(sample_problems) + 1
-    all_problems = sample_problems + [
-        {"id": (start_id + idx), "problem": new_problems[idx]}
-        for idx in range(len(new_problems))
+    new_tasks = parsed_response["parsed_response"]
+    # Combine with sample tasks to get the full set of tasks
+    start_id = len(sample_tasks) + 1
+    all_tasks = sample_tasks + [
+        {"id": (start_id + idx), "problem": new_tasks[idx]}
+        for idx in range(len(new_tasks))
     ]
 
     # Solve task and generate answers
     solved_tasks, task_solver_metadata = capability.solve_tasks(
-        tasks=all_problems,
+        tasks=all_tasks,
         llm=scientist_llm,
         gen_cfg=scientist_llm_gen_cfg_task_solve,
     )
diff --git a/src/utils/prompts.py b/src/utils/prompts.py
@@ -58,45 +58,45 @@ def score(t: dict, submission: str) -> float | None:
 Generate {num_gen_capabilities} new interesting capabilities within the {domain} domain.
 """
 
-PROBLEM_GENERATION_SYSTEM_PROMPT = """
-You are an expert in designing tasks for a given capability. Each task consists of a problem and an answer. Your goal is to create problems alone. The name, description, {zero_or_few_shot_patch} for the capability will be provided. You will be particularly rewarded for designing diverse problems spanning a wide range of difficulty levels for the given capability.
+TASK_GENERATION_SYSTEM_PROMPT = """
+You are an expert in designing tasks for a given capability. The name, description, {zero_or_few_shot_patch} for the capability will be provided. You will be particularly rewarded for designing diverse tasks spanning a wide range of difficulty levels for the given capability.
 
 Respond precisely in the following format, including the JSON start and end markers:
 
 THOUGHT: <THOUGHT>
 RESPONSE JSON:
 {response_json_format}
 
-In <THOUGHT>, briefly think and reason about what kind of problems you want to propose.
-In <STR>, provide a string containing the problem text.
+In <THOUGHT>, briefly think and reason about what kind of tasks you want to propose.
+In <STR>, provide a string containing the task text.
 
-Be careful to make sure that all proposed problems are unique. Also ensure that all problems are within the scope of the given capability. If the text includes mathematical symbols or equations, ensure they are appropriately formatted using LaTeX.
+Be careful to make sure that all proposed tasks are unique. Also ensure that all tasks are within the scope of the given capability. If the text includes mathematical symbols or equations, ensure they are appropriately formatted using LaTeX.
 
 Your response will be automatically parsed so ensure it adheres to the specified format.
 """
 
-PROBLEM_GENERATION_USER_PROMPT = """
-Design problems for the following capability:
+TASK_GENERATION_USER_PROMPT = """
+Design tasks for the following capability:
 
 Name: {capability_name}
 Description: {capability_description}
 Domain: {capability_domain}
 {zero_or_few_shot_patch}
-Generate {num_gen_problems} new problems for the given capability.
+Generate {num_gen_tasks} new tasks for the given capability.
 """
 
-PROBLEM_GENERATION_ZERO_OR_FEW_SHOT_PATCH = {
+TASK_GENERATION_ZERO_OR_FEW_SHOT_PATCH = {
     "zero_shot": {"sys": "and domain", "user": ""},
     "few_shot": {
-        "sys": "domain and a few sample problems",
-        "user": "Sample problems:\n{capability_sample_problems}\n",
+        "sys": "domain and a few sample tasks",
+        "user": "Sample tasks:\n{capability_sample_tasks}\n",
     },
 }
 
-PROBLEM_GENERATION_RESPONSE_JSON_FORMAT = """
+TASK_GENERATION_RESPONSE_JSON_FORMAT = """
 {
-    "problem_1": <STR>,
-    "problem_2": <STR>,
+    "task_1": <STR>,
+    "task_2": <STR>,
     ...
 }""".strip("\n")