Skip to content

Commit 8828587

Browse files
authored
Add span_id and trace_id to EvaluationReport (#2627)
1 parent efbb1e7 commit 8828587

File tree

3 files changed

+21
-4
lines changed

3 files changed

+21
-4
lines changed

pydantic_evals/pydantic_evals/dataset.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,12 @@ async def _handle_case(case: Case[InputsT, OutputT, MetadataT], report_case_name
297297
progress_bar.update(task_id, advance=1)
298298
return result
299299

300+
if (context := eval_span.context) is None: # pragma: no cover
301+
trace_id = None
302+
span_id = None
303+
else:
304+
trace_id = f'{context.trace_id:032x}'
305+
span_id = f'{context.span_id:016x}'
300306
report = EvaluationReport(
301307
name=name,
302308
cases=await task_group_gather(
@@ -305,6 +311,8 @@ async def _handle_case(case: Case[InputsT, OutputT, MetadataT], report_case_name
305311
for i, case in enumerate(self.cases, 1)
306312
]
307313
),
314+
span_id=span_id,
315+
trace_id=trace_id,
308316
)
309317
# TODO(DavidM): This attribute will be too big in general; remove it once we can use child spans in details panel:
310318
eval_span.set_attribute('cases', _REPORT_CASES_ADAPTER.dump_python(report.cases))
@@ -929,8 +937,8 @@ async def _run_task_and_evaluators(
929937

930938
context = case_span.context
931939
if context is None: # pragma: no cover
932-
trace_id = ''
933-
span_id = ''
940+
trace_id = None
941+
span_id = None
934942
else:
935943
trace_id = f'{context.trace_id:032x}'
936944
span_id = f'{context.span_id:016x}'

pydantic_evals/pydantic_evals/reporting/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ class ReportCase(Generic[InputsT, OutputT, MetadataT]):
6868
total_duration: float # includes evaluator execution time
6969

7070
# TODO(DavidM): Drop these once we can reference child spans in details panel:
71-
trace_id: str
72-
span_id: str
71+
trace_id: str | None
72+
span_id: str | None
7373

7474

7575
ReportCaseAdapter = TypeAdapter(ReportCase[Any, Any, Any])
@@ -158,9 +158,16 @@ class EvaluationReport(Generic[InputsT, OutputT, MetadataT]):
158158

159159
name: str
160160
"""The name of the report."""
161+
161162
cases: list[ReportCase[InputsT, OutputT, MetadataT]]
162163
"""The cases in the report."""
163164

165+
span_id: str | None = None
166+
"""The span ID of the evaluation."""
167+
168+
trace_id: str | None = None
169+
"""The trace ID of the evaluation."""
170+
164171
def averages(self) -> ReportCaseAggregate:
165172
return ReportCaseAggregate.average(self.cases)
166173

tests/evals/test_dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,8 @@ async def my_task(inputs: TaskInput) -> TaskOutput:
561561
span_id='0000000000000007',
562562
),
563563
],
564+
span_id='0000000000000001',
565+
trace_id='00000000000000000000000000000001',
564566
)
565567
)
566568

0 commit comments

Comments
 (0)