Skip to content

Commit 9228e7d

Browse files
authored
fix(experiments): remove evals from main execution span (#1433)
1 parent d8c624a commit 9228e7d

File tree

1 file changed

+44
-44
lines changed

1 file changed

+44
-44
lines changed

langfuse/_client/client.py

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2858,56 +2858,56 @@ async def _process_experiment_item(
28582858
metadata=final_observation_metadata,
28592859
)
28602860

2861-
# Run evaluators
2862-
evaluations = []
2861+
except Exception as e:
2862+
span.update(
2863+
output=f"Error: {str(e)}", level="ERROR", status_message=str(e)
2864+
)
2865+
raise e
28632866

2864-
for evaluator in evaluators:
2865-
try:
2866-
eval_metadata: Optional[Dict[str, Any]] = None
2867+
# Run evaluators
2868+
evaluations = []
28672869

2868-
if isinstance(item, dict):
2869-
eval_metadata = item.get("metadata")
2870-
elif hasattr(item, "metadata"):
2871-
eval_metadata = item.metadata
2870+
for evaluator in evaluators:
2871+
try:
2872+
eval_metadata: Optional[Dict[str, Any]] = None
28722873

2873-
eval_results = await _run_evaluator(
2874-
evaluator,
2875-
input=input_data,
2876-
output=output,
2877-
expected_output=expected_output,
2878-
metadata=eval_metadata,
2879-
)
2880-
evaluations.extend(eval_results)
2881-
2882-
# Store evaluations as scores
2883-
for evaluation in eval_results:
2884-
self.create_score(
2885-
trace_id=trace_id,
2886-
observation_id=span.id,
2887-
name=evaluation.name,
2888-
value=evaluation.value, # type: ignore
2889-
comment=evaluation.comment,
2890-
metadata=evaluation.metadata,
2891-
config_id=evaluation.config_id,
2892-
data_type=evaluation.data_type, # type: ignore
2893-
)
2874+
if isinstance(item, dict):
2875+
eval_metadata = item.get("metadata")
2876+
elif hasattr(item, "metadata"):
2877+
eval_metadata = item.metadata
28942878

2895-
except Exception as e:
2896-
langfuse_logger.error(f"Evaluator failed: {e}")
2879+
eval_results = await _run_evaluator(
2880+
evaluator,
2881+
input=input_data,
2882+
output=output,
2883+
expected_output=expected_output,
2884+
metadata=eval_metadata,
2885+
)
2886+
evaluations.extend(eval_results)
2887+
2888+
# Store evaluations as scores
2889+
for evaluation in eval_results:
2890+
self.create_score(
2891+
trace_id=trace_id,
2892+
observation_id=span.id,
2893+
name=evaluation.name,
2894+
value=evaluation.value, # type: ignore
2895+
comment=evaluation.comment,
2896+
metadata=evaluation.metadata,
2897+
config_id=evaluation.config_id,
2898+
data_type=evaluation.data_type, # type: ignore
2899+
)
28972900

2898-
return ExperimentItemResult(
2899-
item=item,
2900-
output=output,
2901-
evaluations=evaluations,
2902-
trace_id=trace_id,
2903-
dataset_run_id=dataset_run_id,
2904-
)
2901+
except Exception as e:
2902+
langfuse_logger.error(f"Evaluator failed: {e}")
29052903

2906-
except Exception as e:
2907-
span.update(
2908-
output=f"Error: {str(e)}", level="ERROR", status_message=str(e)
2909-
)
2910-
raise e
2904+
return ExperimentItemResult(
2905+
item=item,
2906+
output=output,
2907+
evaluations=evaluations,
2908+
trace_id=trace_id,
2909+
dataset_run_id=dataset_run_id,
2910+
)
29112911

29122912
def _create_experiment_run_name(
29132913
self, *, name: Optional[str] = None, run_name: Optional[str] = None

0 commit comments

Comments
 (0)