Skip to content

Commit 1263fd5

Browse files
authored
Validate GEPA metric signature (#8697)
* Validate GEPA metric signature * Update gepa.py * Update gepa.py
1 parent c58c733 commit 1263fd5

File tree

2 files changed

+22
-0
lines changed

2 files changed

+22
-0
lines changed

dspy/teleprompt/gepa/gepa.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import inspect
12
import logging
23
import random
34
from dataclasses import dataclass
@@ -260,6 +261,14 @@ def __init__(
260261
# Reproducibility
261262
seed: int | None = 0,
262263
):
264+
try:
265+
inspect.signature(metric).bind(None, None, None, None, None)
266+
except TypeError as e:
267+
raise TypeError(
268+
"GEPA metric must accept five arguments: (gold, pred, trace, pred_name, pred_trace). "
269+
"See https://dspy.ai/api/optimizers/GEPA for details."
270+
) from e
271+
263272
self.metric_fn = metric
264273

265274
# Budget configuration

tests/teleprompt/test_gepa.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import json
22

3+
import pytest
4+
35
import dspy
46
import dspy.clients
57
from dspy import Example
@@ -29,6 +31,10 @@ def __call__(self, prompt=None, messages=None, **kwargs):
2931
def simple_metric(example, prediction, trace=None, pred_name=None, pred_trace=None):
3032
return dspy.Prediction(score=example.output == prediction.output, feedback="Wrong answer.")
3133

34+
35+
def bad_metric(example, prediction):
36+
return 0.0
37+
3238
def test_basic_workflow():
3339
"""Test to ensure the basic compile flow runs without errors."""
3440
student = SimpleModule("input -> output")
@@ -53,3 +59,10 @@ def test_basic_workflow():
5359
]
5460
o = optimizer.compile(student, trainset=trainset, valset=trainset)
5561
assert o.predictor.signature.instructions == 'Given the field `input` containing a question or phrase, produce the field `output` containing the exact, direct, and contextually appropriate answer or response that the user expects, without additional explanations, commentary, or general knowledge unless explicitly requested.\n\nKey details and guidelines:\n\n1. The `input` field contains a question or phrase that may be literal, factual, or culturally specific (e.g., references to popular culture or memes).\n\n2. The `output` must be the precise answer or response that directly addresses the `input` as intended by the user, not a general or encyclopedic explanation.\n\n3. If the `input` is a well-known phrase or question from popular culture (e.g., "What does the fox say?"), the `output` should reflect the expected or canonical answer associated with that phrase, rather than a factual or scientific explanation.\n\n4. Avoid providing additional background information, scientific explanations, or alternative interpretations unless explicitly requested.\n\n5. The goal is to produce the answer that the user expects or the "correct" answer in the context of the question, including culturally recognized or meme-based answers.\n\n6. If the `input` is a straightforward factual question (e.g., "What is the color of the sky?"), provide the commonly accepted direct answer (e.g., "Blue") rather than a detailed scientific explanation.\n\n7. The output should be concise, clear, and focused solely on answering the question or phrase in the `input`.\n\nExample:\n\n- Input: "What is the color of the sky?"\n- Output: "Blue."\n\n- Input: "What does the fox say?"\n- Output: "Ring-ding-ding-ding-dingeringeding!"\n\nThis approach ensures that the assistant provides the expected, contextually appropriate answers rather than general or overly detailed responses that may be considered incorrect by the user.'
62+
63+
64+
65+
def test_metric_requires_feedback_signature():
66+
reflection_lm = DictDummyLM([])
67+
with pytest.raises(TypeError):
68+
dspy.GEPA(metric=bad_metric, reflection_lm=reflection_lm, max_metric_calls=1)

0 commit comments

Comments
 (0)