fix tests

finitearth · finitearth · commit 484844afd1e2 · 2025-11-30T17:15:51.000+01:00
diff --git a/promptolution/helpers.py b/promptolution/helpers.py
@@ -122,8 +122,10 @@ def run_evaluation(
     if isinstance(prompts[0], str):
         str_prompts = cast(List[str], prompts)
         prompts = [Prompt(p) for p in str_prompts]
+    else:
+        str_prompts = [p.construct_prompt() for p in cast(List[Prompt], prompts)]
     scores = task.evaluate(prompts, predictor, eval_strategy="full")
-    df = pd.DataFrame(dict(prompt=prompts, score=scores))
+    df = pd.DataFrame(dict(prompt=str_prompts, score=scores))
     df = df.sort_values("score", ascending=False, ignore_index=True)
 
     return df
diff --git a/promptolution/utils/prompt_creation.py b/promptolution/utils/prompt_creation.py
@@ -7,6 +7,7 @@
 from typing import TYPE_CHECKING, List, Optional, Union
 
 from promptolution.utils.formatting import extract_from_tag
+from promptolution.utils.logging import get_logger
 
 if TYPE_CHECKING:  # pragma: no cover
     from promptolution.llms.base_llm import BaseLLM
@@ -18,8 +19,11 @@
     PROMPT_CREATION_TEMPLATE_FROM_TASK_DESCRIPTION,
     PROMPT_CREATION_TEMPLATE_TD,
     PROMPT_VARIATION_TEMPLATE,
+    default_prompts,
 )
 
+logger = get_logger(__name__)
+
 
 def create_prompt_variation(
     prompt: Union[List[str], str], llm: "BaseLLM", meta_prompt: Optional[str] = None
@@ -128,6 +132,7 @@ def create_prompts_from_task_description(
     llm: "BaseLLM",
     meta_prompt: Optional[str] = None,
     n_prompts: int = 10,
+    n_retries: int = 3,
 ) -> List[str]:
     """Generate a set of prompts from a given task description.
 
@@ -137,13 +142,27 @@ def create_prompts_from_task_description(
         meta_prompt (str): The meta prompt to use for generating the prompts.
             If None, a default meta prompt is used.
         n_prompts (int): The number of prompts to generate.
+        n_retries (int): The number of retries to attempt if prompt generation fails.
     """
     if meta_prompt is None:
         meta_prompt = PROMPT_CREATION_TEMPLATE_FROM_TASK_DESCRIPTION
 
     meta_prompt = meta_prompt.replace("<task_desc>", task_description).replace("<n_prompts>", str(n_prompts))
-
-    prompts_str = llm.get_response(meta_prompt)[0]
-    prompts = json.loads(prompts_str)
-
-    return prompts
+    final_prompts = None
+    for _ in range(n_retries):
+        prompts_str = llm.get_response(meta_prompt)[0]
+        try:
+            prompts = json.loads(prompts_str)
+            assert isinstance(prompts, list) and all(isinstance(p, str) for p in prompts) and len(prompts) == n_prompts
+            final_prompts = prompts
+            break
+        except (json.JSONDecodeError, AssertionError):
+            logger.warning("Failed to parse prompts JSON, retrying...")
+
+    if final_prompts is None:
+        logger.error(
+            f"Failed to generate prompts from task description after {n_retries} retries, returning default prompts."
+        )
+        final_prompts = default_prompts[:n_prompts]
+
+    return final_prompts
diff --git a/promptolution/utils/templates.py b/promptolution/utils/templates.py
@@ -174,3 +174,31 @@
 
 Return the new prompt in the following format:
 <prompt>new prompt</prompt>"""
+
+
+default_prompts = [
+    "Give me your response within <final_answer> tags.",
+    "Please provide a thoughtful answer to my question and wrap your response in <final_answer> tags so I can easily identify it.",
+    "I need your expertise on this matter. Kindly structure your response within <final_answer> tags for better readability.",
+    "Analyze the following and present your findings enclosed in <final_answer> </final_answer> tags.",
+    "Consider this inquiry carefully. Your comprehensive response should be formatted within <final_answer> tags to facilitate extraction.",
+    "Respond succinctly. Ensure all content appears between <final_answer> and </final_answer> markers.",
+    "Would you mind addressing this request? Please place your entire response inside <final_answer> </final_answer> formatting.",
+    "I'm seeking your insights on a particular topic. Kindly ensure that your complete analysis is contained within <final_answer> tags for my convenience.",
+    "Examine this query thoroughly and deliver your conclusions. All output must be encapsulated in <final_answer> </final_answer> notation for processing purposes.",
+    "Help me understand this subject better. Your explanation should begin with <final_answer> and conclude with </final_answer> to maintain proper structure.",
+    "I require information on the following. Please format your response with <final_answer> tags at the beginning and end for clarity.",
+    "Contemplate this scenario and offer your perspective. Remember to enclose all content within <final_answer> tags as per requirements.",
+    "Elaborate on this concept, making sure to wrap the entirety of your explanation in <final_answer> </final_answer> markers for systematic review.",
+    "Describe your approach to this situation. Be thorough yet concise, and place your complete response between <final_answer> and </final_answer> tags.",
+    "Share your knowledge on this matter. Your entire response should be presented within <final_answer> tags to facilitate proper integration into my workflow.",
+    "Let's think step by step. Your answer should be enclosed within <final_answer> </final_answer> tags.",
+    "Provide a detailed response to the following question, ensuring that all information is contained within <final_answer> tags for easy extraction.",
+    "Kindly address the following topic, formatting your entire response between <final_answer> and </final_answer> markers for clarity and organization.",
+    "Offer your insights on this issue, making sure to encapsulate your full response within <final_answer> tags for seamless processing.",
+    "Delve into this subject and present your findings, ensuring that all content is wrapped in <final_answer> </final_answer> notation for systematic analysis.",
+    "Illuminate this topic with your expertise, formatting your complete explanation within <final_answer> tags for straightforward comprehension.",
+    "Provide your perspective on this matter, ensuring that your entire response is contained within <final_answer> tags for efficient review.",
+    "Analyze the following scenario and deliver your conclusions, making sure to enclose all output in <final_answer> </final_answer> markers for clarity.",
+    "Help me grasp this concept better by structuring your explanation between <final_answer> and </final_answer> tags for proper formatting.",
+]
diff --git a/tests/helpers/test_helpers.py b/tests/helpers/test_helpers.py
@@ -197,6 +197,8 @@ def test_run_evaluation(mock_get_task, mock_get_predictor, mock_get_llm, sample_
         "Is this text positive, negative, or neutral?",
     ]
 
+    prompts = [Prompt(p) for p in prompts]
+
     # Now this will work because mock_task is a MagicMock
     mock_task.evaluate.return_value = np.array([0.8, 0.7, 0.9])
 
@@ -298,6 +300,7 @@ def test_helpers_integration(sample_df, experiment_config):
         # Verify results
         assert isinstance(result, pd.DataFrame)
         assert len(result) == 2
+        print([p in result["prompt"].values for p in optimized_prompts_str])
         assert all(p in result["prompt"].values for p in optimized_prompts_str)
 
         # Verify optimization was called
diff --git a/tests/optimizers/test_capo.py b/tests/optimizers/test_capo.py
@@ -209,7 +209,7 @@ def test_capo_crossover_prompt(mock_meta_llm, mock_predictor, initial_prompts, m
         .replace("<task_desc>", full_task_desc)
     )
 
-    assert mock_meta_llm.call_history[0]["prompts"][0] == expected_meta_prompt
+    assert str(mock_meta_llm.call_history[0]["prompts"][0]) == expected_meta_prompt
 
 
 def test_capo_mutate_prompt(mock_meta_llm, mock_predictor, initial_prompts, mock_task, mock_df):

Original file line number	Diff line number	Diff line change
`@@ -209,7 +209,7 @@ def test_capo_crossover_prompt(mock_meta_llm, mock_predictor, initial_prompts, m`
`209`	`209`	`.replace("<task_desc>", full_task_desc)`
`210`	`210`	`)`
`211`	`211`
`212`		`- assert mock_meta_llm.call_history[0]["prompts"][0] == expected_meta_prompt`
	`212`	`+ assert str(mock_meta_llm.call_history[0]["prompts"][0]) == expected_meta_prompt`
`213`	`213`
`214`	`214`
`215`	`215`	`def test_capo_mutate_prompt(mock_meta_llm, mock_predictor, initial_prompts, mock_task, mock_df):`