|
| 1 | +from __future__ import annotations as _annotations |
| 2 | + |
| 3 | +from collections.abc import Iterable, Iterator, Mapping |
| 4 | +from dataclasses import asdict, dataclass, is_dataclass |
| 5 | +from datetime import date |
| 6 | +from typing import Any |
| 7 | +from xml.etree import ElementTree |
| 8 | + |
| 9 | +from pydantic import BaseModel |
| 10 | + |
| 11 | +__all__ = ('format_as_xml',) |
| 12 | + |
| 13 | + |
| 14 | +def format_as_xml( |
| 15 | + obj: Any, |
| 16 | + root_tag: str = 'examples', |
| 17 | + item_tag: str = 'example', |
| 18 | + include_root_tag: bool = True, |
| 19 | + none_str: str = 'null', |
| 20 | + indent: str | None = ' ', |
| 21 | +) -> str: |
| 22 | + """Format a Python object as XML. |
| 23 | +
|
| 24 | + This is useful since LLMs often find it easier to read semi-structured data (e.g. examples) as XML, |
| 25 | + rather than JSON etc. |
| 26 | +
|
| 27 | + Supports: `str`, `bytes`, `bytearray`, `bool`, `int`, `float`, `date`, `datetime`, `Mapping`, |
| 28 | + `Iterable`, `dataclass`, and `BaseModel`. |
| 29 | +
|
| 30 | + Args: |
| 31 | + obj: Python Object to serialize to XML. |
| 32 | + root_tag: Outer tag to wrap the XML in, use `None` to omit the outer tag. |
| 33 | + item_tag: Tag to use for each item in an iterable (e.g. list), this is overridden by the class name |
| 34 | + for dataclasses and Pydantic models. |
| 35 | + include_root_tag: Whether to include the root tag in the output |
| 36 | + (The root tag is always included if it includes a body - e.g. when the input is a simple value). |
| 37 | + none_str: String to use for `None` values. |
| 38 | + indent: Indentation string to use for pretty printing. |
| 39 | +
|
| 40 | + Returns: XML representation of the object. |
| 41 | +
|
| 42 | + Example: |
| 43 | + ```python {title="format_as_xml_example.py" lint="skip"} |
| 44 | + from pydantic_ai.format_as_xml import format_as_xml |
| 45 | +
|
| 46 | + print(format_as_xml({'name': 'John', 'height': 6, 'weight': 200}, root_tag='user')) |
| 47 | + ''' |
| 48 | + <user> |
| 49 | + <name>John</name> |
| 50 | + <height>6</height> |
| 51 | + <weight>200</weight> |
| 52 | + </user> |
| 53 | + ''' |
| 54 | + ``` |
| 55 | + """ |
| 56 | + el = _ToXml(item_tag=item_tag, none_str=none_str).to_xml(obj, root_tag) |
| 57 | + if not include_root_tag and el.text is None: |
| 58 | + join = '' if indent is None else '\n' |
| 59 | + return join.join(_rootless_xml_elements(el, indent)) |
| 60 | + else: |
| 61 | + if indent is not None: |
| 62 | + ElementTree.indent(el, space=indent) |
| 63 | + return ElementTree.tostring(el, encoding='unicode') |
| 64 | + |
| 65 | + |
| 66 | +@dataclass |
| 67 | +class _ToXml: |
| 68 | + item_tag: str |
| 69 | + none_str: str |
| 70 | + |
| 71 | + def to_xml(self, value: Any, tag: str | None) -> ElementTree.Element: |
| 72 | + element = ElementTree.Element(self.item_tag if tag is None else tag) |
| 73 | + if value is None: |
| 74 | + element.text = self.none_str |
| 75 | + elif isinstance(value, str): |
| 76 | + element.text = value |
| 77 | + elif isinstance(value, (bytes, bytearray)): |
| 78 | + element.text = value.decode(errors='ignore') |
| 79 | + elif isinstance(value, (bool, int, float)): |
| 80 | + element.text = str(value) |
| 81 | + elif isinstance(value, date): |
| 82 | + element.text = value.isoformat() |
| 83 | + elif isinstance(value, Mapping): |
| 84 | + self._mapping_to_xml(element, value) # pyright: ignore[reportUnknownArgumentType] |
| 85 | + elif is_dataclass(value) and not isinstance(value, type): |
| 86 | + if tag is None: |
| 87 | + element = ElementTree.Element(value.__class__.__name__) |
| 88 | + dc_dict = asdict(value) |
| 89 | + self._mapping_to_xml(element, dc_dict) |
| 90 | + elif isinstance(value, BaseModel): |
| 91 | + if tag is None: |
| 92 | + element = ElementTree.Element(value.__class__.__name__) |
| 93 | + self._mapping_to_xml(element, value.model_dump(mode='python')) |
| 94 | + elif isinstance(value, Iterable): |
| 95 | + for item in value: # pyright: ignore[reportUnknownVariableType] |
| 96 | + item_el = self.to_xml(item, None) |
| 97 | + element.append(item_el) |
| 98 | + else: |
| 99 | + raise TypeError(f'Unsupported type for XML formatting: {type(value)}') |
| 100 | + return element |
| 101 | + |
| 102 | + def _mapping_to_xml(self, element: ElementTree.Element, mapping: Mapping[Any, Any]) -> None: |
| 103 | + for key, value in mapping.items(): |
| 104 | + if isinstance(key, int): |
| 105 | + key = str(key) |
| 106 | + elif not isinstance(key, str): |
| 107 | + raise TypeError(f'Unsupported key type for XML formatting: {type(key)}, only str and int are allowed') |
| 108 | + element.append(self.to_xml(value, key)) |
| 109 | + |
| 110 | + |
| 111 | +def _rootless_xml_elements(root: ElementTree.Element, indent: str | None) -> Iterator[str]: |
| 112 | + for sub_element in root: |
| 113 | + if indent is not None: |
| 114 | + ElementTree.indent(sub_element, space=indent) |
| 115 | + yield ElementTree.tostring(sub_element, encoding='unicode') |
0 commit comments