Add custom prompt feature extraction for MAP-Elites

codelion · codelion · commit cd5ccefddd32 · 2025-07-30T20:03:10.000+08:00
Introduces a `calculate_prompt_features` function in the evaluator to bin prompts by length and reasoning strategy, returning these as features for MAP-Elites optimization. Updates config.yaml to specify these features and their binning. Evaluator now returns these features alongside the combined score in both evaluation stages.
diff --git a/examples/llm_prompt_optimization/config.yaml b/examples/llm_prompt_optimization/config.yaml
@@ -51,6 +51,11 @@ database:
   archive_size: 100
   num_islands: 4
   
+  # Feature dimensions for MAP-Elites
+  # Using custom features returned by the evaluator
+  feature_dimensions: ["prompt_length", "reasoning_strategy"]
+  feature_bins: 10  # 10x10 grid = 100 cells
+  
   # Selection parameters - Optimal ratios from testing
   elite_selection_ratio: 0.1   # 10% elite selection
   exploration_ratio: 0.3       # 30% exploration
diff --git a/examples/llm_prompt_optimization/evaluator.py b/examples/llm_prompt_optimization/evaluator.py
@@ -51,6 +51,80 @@
     DATASET_CONFIG_PATH = os.path.join(evaluator_dir, dataset_filename)
     print(f"Dataset configuration: {dataset_filename}")
 
+
+def calculate_prompt_features(prompt):
+    """
+    Calculate custom features for MAP-Elites binning
+    
+    Returns:
+        tuple: (prompt_length, reasoning_strategy) - both in range 0-9
+    """
+    # Feature 1: Prompt length bin (0-9)
+    length = len(prompt)
+    if length < 100:
+        prompt_length = 0    # Minimal
+    elif length < 200:
+        prompt_length = 1    # Very short
+    elif length < 400:
+        prompt_length = 2    # Short
+    elif length < 600:
+        prompt_length = 3    # Medium-short
+    elif length < 900:
+        prompt_length = 4    # Medium
+    elif length < 1200:
+        prompt_length = 5    # Medium-long
+    elif length < 1600:
+        prompt_length = 6    # Long
+    elif length < 2000:
+        prompt_length = 7    # Very long
+    elif length < 2500:
+        prompt_length = 8    # Extensive
+    else:
+        prompt_length = 9    # Very extensive
+    
+    # Feature 2: Reasoning strategy (0-9)
+    prompt_lower = prompt.lower()
+    
+    # Check for few-shot examples
+    has_example = ('example' in prompt_lower or 
+                  prompt.count('####') >= 4 or
+                  bool(re.search(r'problem:.*?solution:', prompt_lower, re.DOTALL)))
+    
+    # Check for Chain-of-Thought (CoT) indicators
+    has_cot = ('step by step' in prompt_lower or 
+               'step-by-step' in prompt_lower or
+               any(phrase in prompt_lower for phrase in ['think through', 'reasoning', 'explain your']) or
+               bool(re.search(r'(first|then|next|finally)', prompt_lower)))
+    
+    # Assign reasoning strategy bins
+    if has_example:
+        # Few-shot examples (bins 7-9)
+        if has_cot:
+            reasoning_strategy = 9  # Few-shot + CoT (most sophisticated)
+        elif length > 1500:
+            reasoning_strategy = 8  # Extensive few-shot
+        else:
+            reasoning_strategy = 7  # Basic few-shot
+    elif has_cot:
+        # Chain-of-thought (bins 4-6)
+        if 'must' in prompt_lower or 'exactly' in prompt_lower:
+            reasoning_strategy = 6  # Strict CoT
+        elif length > 500:
+            reasoning_strategy = 5  # Detailed CoT
+        else:
+            reasoning_strategy = 4  # Basic CoT
+    else:
+        # Basic prompts (bins 0-3)
+        if length < 100:
+            reasoning_strategy = 0  # Minimal
+        elif 'solve' in prompt_lower or 'calculate' in prompt_lower:
+            reasoning_strategy = 2  # Direct instruction
+        else:
+            reasoning_strategy = 1  # Simple prompt
+    
+    return prompt_length, reasoning_strategy
+
+
 def load_prompt_config(prompt_path):
     """Load the prompt from text file and dataset config from matching _dataset.yaml file."""
     # Load prompt from text file
@@ -280,8 +354,14 @@ def evaluate_stage1(prompt_path):
         print(f"Stage 1 accuracy: {accuracy:.3f} ({correct}/{total})")
         print('-' * 80)
         
+        # Calculate custom features
+        prompt_length, reasoning_strategy = calculate_prompt_features(prompt)
+        print(f"Prompt features - Length bin: {prompt_length}, Reasoning bin: {reasoning_strategy}")
+        
         return {
-            "combined_score": accuracy
+            "combined_score": accuracy,
+            "prompt_length": prompt_length,
+            "reasoning_strategy": reasoning_strategy
         }
         
     except Exception as e:
@@ -329,8 +409,14 @@ def evaluate_stage2(prompt_path):
         print(f"Stage 2 accuracy: {accuracy:.3f} ({correct}/{total})")
         print('-' * 80)
         
+        # Calculate custom features
+        prompt_length, reasoning_strategy = calculate_prompt_features(prompt)
+        print(f"Prompt features - Length bin: {prompt_length}, Reasoning bin: {reasoning_strategy}")
+        
         return {
-            "combined_score": accuracy
+            "combined_score": accuracy,
+            "prompt_length": prompt_length,
+            "reasoning_strategy": reasoning_strategy
         }
         
     except Exception as e: