algorithmicsuperintelligence · codelion · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025
diff --git a/examples/llm_prompt_optimization/evaluate_prompts.py b/examples/llm_prompt_optimization/evaluate_prompts.py
@@ -82,9 +82,9 @@ def evaluate_ifeval(client, prompt_template, num_samples, model):
 
         try:
             formatted_prompt = prompt_template.format(instruction=instruction)
-        except KeyError:
-            # Handle prompts with different placeholder names
-            formatted_prompt = prompt_template.replace("{instruction}", instruction)
+        except KeyError as e:
+            print(f"Error: Prompt template missing placeholder: {e}")
+            return 0.0, 0, total, total
 
         # Call LLM with retries
         output_text = None
@@ -163,8 +163,9 @@ def evaluate_hover(client, prompt_template, num_samples, model):
 
         try:
             formatted_prompt = prompt_template.format(claim=claim)
-        except KeyError:
-            formatted_prompt = prompt_template.replace("{claim}", claim)
+        except KeyError as e:
+            print(f"Error: Prompt template missing placeholder: {e}")
+            return 0.0, 0, total, total
 
         # Call LLM with retries
         output_text = None
@@ -258,10 +259,9 @@ def evaluate_hotpotqa(client, prompt_template, num_samples, model):
             formatted_prompt = prompt_template.format(
                 context=context_str.strip(), question=question
             )
-        except KeyError:
-            # Try alternative formatting
-            formatted_prompt = prompt_template.replace("{context}", context_str.strip())
-            formatted_prompt = formatted_prompt.replace("{question}", question)
+        except KeyError as e:
+            print(f"Error: Prompt template missing placeholders: {e}")
+            return 0.0, 0, total, total
 
         # Call LLM with retries
         output_text = None

diff --git a/examples/llm_prompt_optimization/templates/evaluator_system_message.txt b/examples/llm_prompt_optimization/templates/evaluator_system_message.txt
@@ -1,13 +1,2 @@
-You are an expert prompt engineer specializing in creating effective prompts for language models.
-
-Your task is to evolve and improve prompts to maximize their performance on specific tasks. When rewriting prompts:
-
-1. **Maintain the exact placeholder format**: Always use the same placeholder name as in the original prompt (e.g., {instruction}, {claim}, {context}, {question})
-2. **Keep it simple**: Avoid overly complex or verbose instructions unless necessary
-3. **Be specific**: Provide clear, actionable guidance to the model
-4. **Test-oriented**: Focus on what will improve accuracy on the given evaluation metrics
-5. **Format-aware**: Ensure the prompt works well with the expected input/output format
-
-**CRITICAL**: Your rewritten prompt must use EXACTLY the same placeholder names as the original. Do not change {instruction} to {input_text} or any other variation.
-
-Generate only the improved prompt text, nothing else.
+You are an expert prompt evaluator.
+Your job is to analyze the provided prompts and evaluate them systematically.