Skip to content

Commit f0e0c38

Browse files
authored
Merge branch 'main' into feature/native-mcp-support
2 parents cd31038 + 6cf43ea commit f0e0c38

File tree

2 files changed

+72
-7
lines changed

2 files changed

+72
-7
lines changed

docs/logfire.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,14 @@ The following providers have dedicated documentation on Pydantic AI:
268268

269269
### Configuring data format
270270

271-
Pydantic AI follows the [OpenTelemetry Semantic Conventions for Generative AI systems](https://opentelemetry.io/docs/specs/semconv/gen-ai/). Specifically, it follows version 1.37.0 of the conventions by default. To use [version 1.36.0](https://github.com/open-telemetry/semantic-conventions/blob/v1.36.0/docs/gen-ai/README.md) or older, pass [`InstrumentationSettings(version=1)`][pydantic_ai.models.instrumented.InstrumentationSettings] (the default is `version=2`). Moreover, those semantic conventions specify that messages should be captured as individual events (logs) that are children of the request span, whereas by default, Pydantic AI instead collects these events into a JSON array which is set as a single large attribute called `events` on the request span. To change this, use `event_mode='logs'`:
271+
Pydantic AI follows the [OpenTelemetry Semantic Conventions for Generative AI systems](https://opentelemetry.io/docs/specs/semconv/gen-ai/). Specifically, it follows version 1.37.0 of the conventions by default, with a few exceptions. Certain span and attribute names are not spec compliant by default for compatibility reasons, but can be made compliant by passing [`InstrumentationSettings(version=3)`][pydantic_ai.models.instrumented.InstrumentationSettings] (the default is currently `version=2`). This will change the following:
272+
273+
- The span name `agent run` becomes `invoke_agent {gen_ai.agent.name}` (with the agent name filled in)
274+
- The span name `running tool` becomes `execute_tool {gen_ai.tool.name}` (with the tool name filled in)
275+
- The attribute name `tool_arguments` becomes `gen_ai.tool.call.arguments`
276+
- The attribute name `tool_response` becomes `gen_ai.tool.call.result`
277+
278+
To use [OpenTelemetry semantic conventions version 1.36.0](https://github.com/open-telemetry/semantic-conventions/blob/v1.36.0/docs/gen-ai/README.md) or older, pass [`InstrumentationSettings(version=1)`][pydantic_ai.models.instrumented.InstrumentationSettings]. Moreover, those semantic conventions specify that messages should be captured as individual events (logs) that are children of the request span, whereas by default, Pydantic AI instead collects these events into a JSON array which is set as a single large attribute called `events` on the request span. To change this, use `event_mode='logs'`:
272279

273280
```python {title="instrumentation_settings_event_mode.py"}
274281
import logfire

pydantic_evals/pydantic_evals/reporting/__init__.py

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,11 +206,70 @@ def averages(self) -> ReportCaseAggregate | None:
206206
return ReportCaseAggregate.average(self.cases)
207207
return None
208208

209+
def render(
210+
self,
211+
width: int | None = None,
212+
baseline: EvaluationReport[InputsT, OutputT, MetadataT] | None = None,
213+
*,
214+
include_input: bool = False,
215+
include_metadata: bool = False,
216+
include_expected_output: bool = False,
217+
include_output: bool = False,
218+
include_durations: bool = True,
219+
include_total_duration: bool = False,
220+
include_removed_cases: bool = False,
221+
include_averages: bool = True,
222+
include_errors: bool = True,
223+
include_error_stacktrace: bool = False,
224+
include_evaluator_failures: bool = True,
225+
input_config: RenderValueConfig | None = None,
226+
metadata_config: RenderValueConfig | None = None,
227+
output_config: RenderValueConfig | None = None,
228+
score_configs: dict[str, RenderNumberConfig] | None = None,
229+
label_configs: dict[str, RenderValueConfig] | None = None,
230+
metric_configs: dict[str, RenderNumberConfig] | None = None,
231+
duration_config: RenderNumberConfig | None = None,
232+
include_reasons: bool = False,
233+
) -> str: # pragma: no cover
234+
"""Render this report to a nicely-formatted string, optionally comparing it to a baseline report.
235+
236+
If you want more control over the output, use `console_table` instead and pass it to `rich.Console.print`.
237+
"""
238+
io_file = StringIO()
239+
console = Console(width=width, file=io_file)
240+
self.print(
241+
width=width,
242+
baseline=baseline,
243+
console=console,
244+
include_input=include_input,
245+
include_metadata=include_metadata,
246+
include_expected_output=include_expected_output,
247+
include_output=include_output,
248+
include_durations=include_durations,
249+
include_total_duration=include_total_duration,
250+
include_removed_cases=include_removed_cases,
251+
include_averages=include_averages,
252+
include_errors=include_errors,
253+
include_error_stacktrace=include_error_stacktrace,
254+
include_evaluator_failures=include_evaluator_failures,
255+
input_config=input_config,
256+
metadata_config=metadata_config,
257+
output_config=output_config,
258+
score_configs=score_configs,
259+
label_configs=label_configs,
260+
metric_configs=metric_configs,
261+
duration_config=duration_config,
262+
include_reasons=include_reasons,
263+
)
264+
Console(file=io_file)
265+
return io_file.getvalue()
266+
209267
def print(
210268
self,
211269
width: int | None = None,
212270
baseline: EvaluationReport[InputsT, OutputT, MetadataT] | None = None,
213271
*,
272+
console: Console | None = None,
214273
include_input: bool = False,
215274
include_metadata: bool = False,
216275
include_expected_output: bool = False,
@@ -230,11 +289,14 @@ def print(
230289
metric_configs: dict[str, RenderNumberConfig] | None = None,
231290
duration_config: RenderNumberConfig | None = None,
232291
include_reasons: bool = False,
233-
): # pragma: no cover
292+
) -> None: # pragma: no cover
234293
"""Print this report to the console, optionally comparing it to a baseline report.
235294
236295
If you want more control over the output, use `console_table` instead and pass it to `rich.Console.print`.
237296
"""
297+
if console is None:
298+
console = Console(width=width)
299+
238300
table = self.console_table(
239301
baseline=baseline,
240302
include_input=include_input,
@@ -255,7 +317,6 @@ def print(
255317
duration_config=duration_config,
256318
include_reasons=include_reasons,
257319
)
258-
console = Console(width=width)
259320
console.print(table)
260321
if include_errors and self.failures:
261322
failures_table = self.failures_table(
@@ -358,10 +419,7 @@ def failures_table(
358419

359420
def __str__(self) -> str: # pragma: lax no cover
360421
"""Return a string representation of the report."""
361-
table = self.console_table()
362-
io_file = StringIO()
363-
Console(file=io_file).print(table)
364-
return io_file.getvalue()
422+
return self.render()
365423

366424

367425
EvaluationReportAdapter = TypeAdapter(EvaluationReport[Any, Any, Any])

0 commit comments

Comments
 (0)