Skip to content

Commit fd7e850

Browse files
authored
fix(experiments): move evals out of root span (#1437)
1 parent 9ec9f4a commit fd7e850

File tree

1 file changed

+38
-38
lines changed

1 file changed

+38
-38
lines changed

langfuse/_client/client.py

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2864,50 +2864,50 @@ async def _process_experiment_item(
28642864
)
28652865
raise e
28662866

2867-
# Run evaluators
2868-
evaluations = []
2867+
# Run evaluators
2868+
evaluations = []
28692869

2870-
for evaluator in evaluators:
2871-
try:
2872-
eval_metadata: Optional[Dict[str, Any]] = None
2870+
for evaluator in evaluators:
2871+
try:
2872+
eval_metadata: Optional[Dict[str, Any]] = None
28732873

2874-
if isinstance(item, dict):
2875-
eval_metadata = item.get("metadata")
2876-
elif hasattr(item, "metadata"):
2877-
eval_metadata = item.metadata
2874+
if isinstance(item, dict):
2875+
eval_metadata = item.get("metadata")
2876+
elif hasattr(item, "metadata"):
2877+
eval_metadata = item.metadata
28782878

2879-
eval_results = await _run_evaluator(
2880-
evaluator,
2881-
input=input_data,
2882-
output=output,
2883-
expected_output=expected_output,
2884-
metadata=eval_metadata,
2879+
eval_results = await _run_evaluator(
2880+
evaluator,
2881+
input=input_data,
2882+
output=output,
2883+
expected_output=expected_output,
2884+
metadata=eval_metadata,
2885+
)
2886+
evaluations.extend(eval_results)
2887+
2888+
# Store evaluations as scores
2889+
for evaluation in eval_results:
2890+
self.create_score(
2891+
trace_id=trace_id,
2892+
observation_id=span.id,
2893+
name=evaluation.name,
2894+
value=evaluation.value, # type: ignore
2895+
comment=evaluation.comment,
2896+
metadata=evaluation.metadata,
2897+
config_id=evaluation.config_id,
2898+
data_type=evaluation.data_type, # type: ignore
28852899
)
2886-
evaluations.extend(eval_results)
2887-
2888-
# Store evaluations as scores
2889-
for evaluation in eval_results:
2890-
self.create_score(
2891-
trace_id=trace_id,
2892-
observation_id=span.id,
2893-
name=evaluation.name,
2894-
value=evaluation.value, # type: ignore
2895-
comment=evaluation.comment,
2896-
metadata=evaluation.metadata,
2897-
config_id=evaluation.config_id,
2898-
data_type=evaluation.data_type, # type: ignore
2899-
)
29002900

2901-
except Exception as e:
2902-
langfuse_logger.error(f"Evaluator failed: {e}")
2901+
except Exception as e:
2902+
langfuse_logger.error(f"Evaluator failed: {e}")
29032903

2904-
return ExperimentItemResult(
2905-
item=item,
2906-
output=output,
2907-
evaluations=evaluations,
2908-
trace_id=trace_id,
2909-
dataset_run_id=dataset_run_id,
2910-
)
2904+
return ExperimentItemResult(
2905+
item=item,
2906+
output=output,
2907+
evaluations=evaluations,
2908+
trace_id=trace_id,
2909+
dataset_run_id=dataset_run_id,
2910+
)
29112911

29122912
def _create_experiment_run_name(
29132913
self, *, name: Optional[str] = None, run_name: Optional[str] = None

0 commit comments

Comments
 (0)