Skip to content

Commit cd5ccef

Browse files
committed
Add custom prompt feature extraction for MAP-Elites
Introduces a `calculate_prompt_features` function in the evaluator to bin prompts by length and reasoning strategy, returning these as features for MAP-Elites optimization. Updates config.yaml to specify these features and their binning. Evaluator now returns these features alongside the combined score in both evaluation stages.
1 parent 6d6d50e commit cd5ccef

File tree

2 files changed

+93
-2
lines changed

2 files changed

+93
-2
lines changed

examples/llm_prompt_optimization/config.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ database:
5151
archive_size: 100
5252
num_islands: 4
5353

54+
# Feature dimensions for MAP-Elites
55+
# Using custom features returned by the evaluator
56+
feature_dimensions: ["prompt_length", "reasoning_strategy"]
57+
feature_bins: 10 # 10x10 grid = 100 cells
58+
5459
# Selection parameters - Optimal ratios from testing
5560
elite_selection_ratio: 0.1 # 10% elite selection
5661
exploration_ratio: 0.3 # 30% exploration

examples/llm_prompt_optimization/evaluator.py

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,80 @@
5151
DATASET_CONFIG_PATH = os.path.join(evaluator_dir, dataset_filename)
5252
print(f"Dataset configuration: {dataset_filename}")
5353

54+
55+
def calculate_prompt_features(prompt):
56+
"""
57+
Calculate custom features for MAP-Elites binning
58+
59+
Returns:
60+
tuple: (prompt_length, reasoning_strategy) - both in range 0-9
61+
"""
62+
# Feature 1: Prompt length bin (0-9)
63+
length = len(prompt)
64+
if length < 100:
65+
prompt_length = 0 # Minimal
66+
elif length < 200:
67+
prompt_length = 1 # Very short
68+
elif length < 400:
69+
prompt_length = 2 # Short
70+
elif length < 600:
71+
prompt_length = 3 # Medium-short
72+
elif length < 900:
73+
prompt_length = 4 # Medium
74+
elif length < 1200:
75+
prompt_length = 5 # Medium-long
76+
elif length < 1600:
77+
prompt_length = 6 # Long
78+
elif length < 2000:
79+
prompt_length = 7 # Very long
80+
elif length < 2500:
81+
prompt_length = 8 # Extensive
82+
else:
83+
prompt_length = 9 # Very extensive
84+
85+
# Feature 2: Reasoning strategy (0-9)
86+
prompt_lower = prompt.lower()
87+
88+
# Check for few-shot examples
89+
has_example = ('example' in prompt_lower or
90+
prompt.count('####') >= 4 or
91+
bool(re.search(r'problem:.*?solution:', prompt_lower, re.DOTALL)))
92+
93+
# Check for Chain-of-Thought (CoT) indicators
94+
has_cot = ('step by step' in prompt_lower or
95+
'step-by-step' in prompt_lower or
96+
any(phrase in prompt_lower for phrase in ['think through', 'reasoning', 'explain your']) or
97+
bool(re.search(r'(first|then|next|finally)', prompt_lower)))
98+
99+
# Assign reasoning strategy bins
100+
if has_example:
101+
# Few-shot examples (bins 7-9)
102+
if has_cot:
103+
reasoning_strategy = 9 # Few-shot + CoT (most sophisticated)
104+
elif length > 1500:
105+
reasoning_strategy = 8 # Extensive few-shot
106+
else:
107+
reasoning_strategy = 7 # Basic few-shot
108+
elif has_cot:
109+
# Chain-of-thought (bins 4-6)
110+
if 'must' in prompt_lower or 'exactly' in prompt_lower:
111+
reasoning_strategy = 6 # Strict CoT
112+
elif length > 500:
113+
reasoning_strategy = 5 # Detailed CoT
114+
else:
115+
reasoning_strategy = 4 # Basic CoT
116+
else:
117+
# Basic prompts (bins 0-3)
118+
if length < 100:
119+
reasoning_strategy = 0 # Minimal
120+
elif 'solve' in prompt_lower or 'calculate' in prompt_lower:
121+
reasoning_strategy = 2 # Direct instruction
122+
else:
123+
reasoning_strategy = 1 # Simple prompt
124+
125+
return prompt_length, reasoning_strategy
126+
127+
54128
def load_prompt_config(prompt_path):
55129
"""Load the prompt from text file and dataset config from matching _dataset.yaml file."""
56130
# Load prompt from text file
@@ -280,8 +354,14 @@ def evaluate_stage1(prompt_path):
280354
print(f"Stage 1 accuracy: {accuracy:.3f} ({correct}/{total})")
281355
print('-' * 80)
282356

357+
# Calculate custom features
358+
prompt_length, reasoning_strategy = calculate_prompt_features(prompt)
359+
print(f"Prompt features - Length bin: {prompt_length}, Reasoning bin: {reasoning_strategy}")
360+
283361
return {
284-
"combined_score": accuracy
362+
"combined_score": accuracy,
363+
"prompt_length": prompt_length,
364+
"reasoning_strategy": reasoning_strategy
285365
}
286366

287367
except Exception as e:
@@ -329,8 +409,14 @@ def evaluate_stage2(prompt_path):
329409
print(f"Stage 2 accuracy: {accuracy:.3f} ({correct}/{total})")
330410
print('-' * 80)
331411

412+
# Calculate custom features
413+
prompt_length, reasoning_strategy = calculate_prompt_features(prompt)
414+
print(f"Prompt features - Length bin: {prompt_length}, Reasoning bin: {reasoning_strategy}")
415+
332416
return {
333-
"combined_score": accuracy
417+
"combined_score": accuracy,
418+
"prompt_length": prompt_length,
419+
"reasoning_strategy": reasoning_strategy
334420
}
335421

336422
except Exception as e:

0 commit comments

Comments
 (0)