|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +from collections.abc import Mapping |
| 4 | +from datetime import date, datetime, time |
| 5 | +from types import MappingProxyType |
| 6 | + |
| 7 | +TYPE_CHECKING = False |
| 8 | +if TYPE_CHECKING: |
| 9 | + from collections.abc import Generator |
| 10 | + from decimal import Decimal |
| 11 | + from typing import IO, Any, Final |
| 12 | + |
| 13 | +ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) |
| 14 | +ILLEGAL_BASIC_STR_CHARS = frozenset('"\\') | ASCII_CTRL - frozenset("\t") |
| 15 | +BARE_KEY_CHARS = frozenset( |
| 16 | + "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "-_" |
| 17 | +) |
| 18 | +ARRAY_TYPES = (list, tuple) |
| 19 | +MAX_LINE_LENGTH = 100 |
| 20 | + |
| 21 | +COMPACT_ESCAPES = MappingProxyType( |
| 22 | + { |
| 23 | + "\u0008": "\\b", # backspace |
| 24 | + "\u000A": "\\n", # linefeed |
| 25 | + "\u000C": "\\f", # form feed |
| 26 | + "\u000D": "\\r", # carriage return |
| 27 | + "\u0022": '\\"', # quote |
| 28 | + "\u005C": "\\\\", # backslash |
| 29 | + } |
| 30 | +) |
| 31 | + |
| 32 | + |
| 33 | +class Context: |
| 34 | + def __init__(self, allow_multiline: bool, indent: int): |
| 35 | + if indent < 0: |
| 36 | + raise ValueError("Indent width must be non-negative") |
| 37 | + self.allow_multiline: Final = allow_multiline |
| 38 | + # cache rendered inline tables (mapping from object id to rendered inline table) |
| 39 | + self.inline_table_cache: Final[dict[int, str]] = {} |
| 40 | + self.indent_str: Final = " " * indent |
| 41 | + |
| 42 | + |
| 43 | +def dump( |
| 44 | + obj: Mapping[str, Any], |
| 45 | + fp: IO[bytes], |
| 46 | + /, |
| 47 | + *, |
| 48 | + multiline_strings: bool = False, |
| 49 | + indent: int = 4, |
| 50 | +) -> None: |
| 51 | + ctx = Context(multiline_strings, indent) |
| 52 | + for chunk in gen_table_chunks(obj, ctx, name=""): |
| 53 | + fp.write(chunk.encode()) |
| 54 | + |
| 55 | + |
| 56 | +def dumps( |
| 57 | + obj: Mapping[str, Any], /, *, multiline_strings: bool = False, indent: int = 4 |
| 58 | +) -> str: |
| 59 | + ctx = Context(multiline_strings, indent) |
| 60 | + return "".join(gen_table_chunks(obj, ctx, name="")) |
| 61 | + |
| 62 | + |
| 63 | +def gen_table_chunks( |
| 64 | + table: Mapping[str, Any], |
| 65 | + ctx: Context, |
| 66 | + *, |
| 67 | + name: str, |
| 68 | + inside_aot: bool = False, |
| 69 | +) -> Generator[str, None, None]: |
| 70 | + yielded = False |
| 71 | + literals = [] |
| 72 | + tables: list[tuple[str, Any, bool]] = [] # => [(key, value, inside_aot)] |
| 73 | + for k, v in table.items(): |
| 74 | + if isinstance(v, Mapping): |
| 75 | + tables.append((k, v, False)) |
| 76 | + elif is_aot(v) and not all(is_suitable_inline_table(t, ctx) for t in v): |
| 77 | + tables.extend((k, t, True) for t in v) |
| 78 | + else: |
| 79 | + literals.append((k, v)) |
| 80 | + |
| 81 | + if inside_aot or name and (literals or not tables): |
| 82 | + yielded = True |
| 83 | + yield f"[[{name}]]\n" if inside_aot else f"[{name}]\n" |
| 84 | + |
| 85 | + if literals: |
| 86 | + yielded = True |
| 87 | + for k, v in literals: |
| 88 | + yield f"{format_key_part(k)} = {format_literal(v, ctx)}\n" |
| 89 | + |
| 90 | + for k, v, in_aot in tables: |
| 91 | + if yielded: |
| 92 | + yield "\n" |
| 93 | + else: |
| 94 | + yielded = True |
| 95 | + key_part = format_key_part(k) |
| 96 | + display_name = f"{name}.{key_part}" if name else key_part |
| 97 | + yield from gen_table_chunks(v, ctx, name=display_name, inside_aot=in_aot) |
| 98 | + |
| 99 | + |
| 100 | +def format_literal(obj: object, ctx: Context, *, nest_level: int = 0) -> str: |
| 101 | + if isinstance(obj, bool): |
| 102 | + return "true" if obj else "false" |
| 103 | + if isinstance(obj, (int, float, date, datetime)): |
| 104 | + return str(obj) |
| 105 | + if isinstance(obj, time): |
| 106 | + if obj.tzinfo: |
| 107 | + raise ValueError("TOML does not support offset times") |
| 108 | + return str(obj) |
| 109 | + if isinstance(obj, str): |
| 110 | + return format_string(obj, allow_multiline=ctx.allow_multiline) |
| 111 | + if isinstance(obj, ARRAY_TYPES): |
| 112 | + return format_inline_array(obj, ctx, nest_level) |
| 113 | + if isinstance(obj, Mapping): |
| 114 | + return format_inline_table(obj, ctx) |
| 115 | + |
| 116 | + # Lazy import to improve module import time |
| 117 | + from decimal import Decimal |
| 118 | + |
| 119 | + if isinstance(obj, Decimal): |
| 120 | + return format_decimal(obj) |
| 121 | + raise TypeError( |
| 122 | + f"Object of type '{type(obj).__qualname__}' is not TOML serializable" |
| 123 | + ) |
| 124 | + |
| 125 | + |
| 126 | +def format_decimal(obj: Decimal) -> str: |
| 127 | + if obj.is_nan(): |
| 128 | + return "nan" |
| 129 | + if obj.is_infinite(): |
| 130 | + return "-inf" if obj.is_signed() else "inf" |
| 131 | + dec_str = str(obj).lower() |
| 132 | + return dec_str if "." in dec_str or "e" in dec_str else dec_str + ".0" |
| 133 | + |
| 134 | + |
| 135 | +def format_inline_table(obj: Mapping, ctx: Context) -> str: |
| 136 | + # check cache first |
| 137 | + obj_id = id(obj) |
| 138 | + if obj_id in ctx.inline_table_cache: |
| 139 | + return ctx.inline_table_cache[obj_id] |
| 140 | + |
| 141 | + if not obj: |
| 142 | + rendered = "{}" |
| 143 | + else: |
| 144 | + rendered = ( |
| 145 | + "{ " |
| 146 | + + ", ".join( |
| 147 | + f"{format_key_part(k)} = {format_literal(v, ctx)}" |
| 148 | + for k, v in obj.items() |
| 149 | + ) |
| 150 | + + " }" |
| 151 | + ) |
| 152 | + ctx.inline_table_cache[obj_id] = rendered |
| 153 | + return rendered |
| 154 | + |
| 155 | + |
| 156 | +def format_inline_array(obj: tuple | list, ctx: Context, nest_level: int) -> str: |
| 157 | + if not obj: |
| 158 | + return "[]" |
| 159 | + item_indent = ctx.indent_str * (1 + nest_level) |
| 160 | + closing_bracket_indent = ctx.indent_str * nest_level |
| 161 | + return ( |
| 162 | + "[\n" |
| 163 | + + ",\n".join( |
| 164 | + item_indent + format_literal(item, ctx, nest_level=nest_level + 1) |
| 165 | + for item in obj |
| 166 | + ) |
| 167 | + + f",\n{closing_bracket_indent}]" |
| 168 | + ) |
| 169 | + |
| 170 | + |
| 171 | +def format_key_part(part: str) -> str: |
| 172 | + try: |
| 173 | + only_bare_key_chars = BARE_KEY_CHARS.issuperset(part) |
| 174 | + except TypeError: |
| 175 | + raise TypeError( |
| 176 | + f"Invalid mapping key '{part}' of type '{type(part).__qualname__}'." |
| 177 | + " A string is required." |
| 178 | + ) from None |
| 179 | + |
| 180 | + if part and only_bare_key_chars: |
| 181 | + return part |
| 182 | + return format_string(part, allow_multiline=False) |
| 183 | + |
| 184 | + |
| 185 | +def format_string(s: str, *, allow_multiline: bool) -> str: |
| 186 | + do_multiline = allow_multiline and "\n" in s |
| 187 | + if do_multiline: |
| 188 | + result = '"""\n' |
| 189 | + s = s.replace("\r\n", "\n") |
| 190 | + else: |
| 191 | + result = '"' |
| 192 | + |
| 193 | + pos = seq_start = 0 |
| 194 | + while True: |
| 195 | + try: |
| 196 | + char = s[pos] |
| 197 | + except IndexError: |
| 198 | + result += s[seq_start:pos] |
| 199 | + if do_multiline: |
| 200 | + return result + '"""' |
| 201 | + return result + '"' |
| 202 | + if char in ILLEGAL_BASIC_STR_CHARS: |
| 203 | + result += s[seq_start:pos] |
| 204 | + if char in COMPACT_ESCAPES: |
| 205 | + if do_multiline and char == "\n": |
| 206 | + result += "\n" |
| 207 | + else: |
| 208 | + result += COMPACT_ESCAPES[char] |
| 209 | + else: |
| 210 | + result += "\\u" + hex(ord(char))[2:].rjust(4, "0") |
| 211 | + seq_start = pos + 1 |
| 212 | + pos += 1 |
| 213 | + |
| 214 | + |
| 215 | +def is_aot(obj: Any) -> bool: |
| 216 | + """Decides if an object behaves as an array of tables (i.e. a nonempty list |
| 217 | + of dicts).""" |
| 218 | + return bool( |
| 219 | + isinstance(obj, ARRAY_TYPES) |
| 220 | + and obj |
| 221 | + and all(isinstance(v, Mapping) for v in obj) |
| 222 | + ) |
| 223 | + |
| 224 | + |
| 225 | +def is_suitable_inline_table(obj: Mapping, ctx: Context) -> bool: |
| 226 | + """Use heuristics to decide if the inline-style representation is a good |
| 227 | + choice for a given table.""" |
| 228 | + rendered_inline = f"{ctx.indent_str}{format_inline_table(obj, ctx)}," |
| 229 | + return len(rendered_inline) <= MAX_LINE_LENGTH and "\n" not in rendered_inline |
0 commit comments