@@ -2864,50 +2864,50 @@ async def _process_experiment_item(
28642864 )
28652865 raise e
28662866
2867- # Run evaluators
2868- evaluations = []
2867+ # Run evaluators
2868+ evaluations = []
28692869
2870- for evaluator in evaluators :
2871- try :
2872- eval_metadata : Optional [Dict [str , Any ]] = None
2870+ for evaluator in evaluators :
2871+ try :
2872+ eval_metadata : Optional [Dict [str , Any ]] = None
28732873
2874- if isinstance (item , dict ):
2875- eval_metadata = item .get ("metadata" )
2876- elif hasattr (item , "metadata" ):
2877- eval_metadata = item .metadata
2874+ if isinstance (item , dict ):
2875+ eval_metadata = item .get ("metadata" )
2876+ elif hasattr (item , "metadata" ):
2877+ eval_metadata = item .metadata
28782878
2879- eval_results = await _run_evaluator (
2880- evaluator ,
2881- input = input_data ,
2882- output = output ,
2883- expected_output = expected_output ,
2884- metadata = eval_metadata ,
2879+ eval_results = await _run_evaluator (
2880+ evaluator ,
2881+ input = input_data ,
2882+ output = output ,
2883+ expected_output = expected_output ,
2884+ metadata = eval_metadata ,
2885+ )
2886+ evaluations .extend (eval_results )
2887+
2888+ # Store evaluations as scores
2889+ for evaluation in eval_results :
2890+ self .create_score (
2891+ trace_id = trace_id ,
2892+ observation_id = span .id ,
2893+ name = evaluation .name ,
2894+ value = evaluation .value , # type: ignore
2895+ comment = evaluation .comment ,
2896+ metadata = evaluation .metadata ,
2897+ config_id = evaluation .config_id ,
2898+ data_type = evaluation .data_type , # type: ignore
28852899 )
2886- evaluations .extend (eval_results )
2887-
2888- # Store evaluations as scores
2889- for evaluation in eval_results :
2890- self .create_score (
2891- trace_id = trace_id ,
2892- observation_id = span .id ,
2893- name = evaluation .name ,
2894- value = evaluation .value , # type: ignore
2895- comment = evaluation .comment ,
2896- metadata = evaluation .metadata ,
2897- config_id = evaluation .config_id ,
2898- data_type = evaluation .data_type , # type: ignore
2899- )
29002900
2901- except Exception as e :
2902- langfuse_logger .error (f"Evaluator failed: { e } " )
2901+ except Exception as e :
2902+ langfuse_logger .error (f"Evaluator failed: { e } " )
29032903
2904- return ExperimentItemResult (
2905- item = item ,
2906- output = output ,
2907- evaluations = evaluations ,
2908- trace_id = trace_id ,
2909- dataset_run_id = dataset_run_id ,
2910- )
2904+ return ExperimentItemResult (
2905+ item = item ,
2906+ output = output ,
2907+ evaluations = evaluations ,
2908+ trace_id = trace_id ,
2909+ dataset_run_id = dataset_run_id ,
2910+ )
29112911
29122912 def _create_experiment_run_name (
29132913 self , * , name : Optional [str ] = None , run_name : Optional [str ] = None
0 commit comments