Skip to content

Commit bac476f

Browse files
cetra3DouweM
andauthored
Include report averages (#3053)
Co-authored-by: Douwe Maan <[email protected]>
1 parent 9d20769 commit bac476f

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

pydantic_evals/pydantic_evals/dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,8 @@ async def _handle_case(case: Case[InputsT, OutputT, MetadataT], report_case_name
343343
trace_id=trace_id,
344344
)
345345
if (averages := report.averages()) is not None and averages.assertions is not None:
346+
experiment_metadata = {'n_cases': len(self.cases), 'averages': averages}
347+
eval_span.set_attribute('logfire.experiment.metadata', experiment_metadata)
346348
eval_span.set_attribute('assertion_pass_rate', averages.assertions)
347349
return report
348350

tests/evals/test_dataset.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,6 +1555,16 @@ async def mock_async_task(inputs: TaskInput) -> TaskOutput:
15551555
'gen_ai.operation.name': {},
15561556
'n_cases': {},
15571557
'name': {},
1558+
'logfire.experiment.metadata': {
1559+
'type': 'object',
1560+
'properties': {
1561+
'averages': {
1562+
'type': 'object',
1563+
'title': 'ReportCaseAggregate',
1564+
'x-python-datatype': 'PydanticModel',
1565+
}
1566+
},
1567+
},
15581568
'task_name': {},
15591569
},
15601570
'type': 'object',
@@ -1563,6 +1573,18 @@ async def mock_async_task(inputs: TaskInput) -> TaskOutput:
15631573
'logfire.msg_template': 'evaluate {name}',
15641574
'logfire.span_type': 'span',
15651575
'n_cases': 2,
1576+
'logfire.experiment.metadata': {
1577+
'n_cases': 2,
1578+
'averages': {
1579+
'name': 'Averages',
1580+
'scores': {'confidence': 1.0},
1581+
'labels': {},
1582+
'metrics': {},
1583+
'assertions': 1.0,
1584+
'task_duration': 1.0,
1585+
'total_duration': 9.0,
1586+
},
1587+
},
15661588
'name': 'mock_async_task',
15671589
'task_name': 'mock_async_task',
15681590
},

0 commit comments

Comments
 (0)