|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
| 3 | +import sys |
| 4 | +import dataclasses |
3 | 5 | import json |
4 | 6 | import timeit |
5 | | -from typing import List, Union |
| 7 | +import importlib.metadata |
| 8 | +from typing import Any, Literal, Callable |
6 | 9 |
|
7 | | -import msgpack |
8 | | -import orjson |
9 | | -import ujson |
10 | | -from generate_data import make_filesystem_data |
| 10 | +from .generate_data import make_filesystem_data |
11 | 11 |
|
12 | 12 | import msgspec |
13 | 13 |
|
14 | 14 |
|
15 | | -class File(msgspec.Struct, tag="file"): |
| 15 | +class File(msgspec.Struct, kw_only=True, omit_defaults=True, tag="file"): |
16 | 16 | name: str |
17 | 17 | created_by: str |
18 | 18 | created_at: str |
19 | | - updated_at: str |
| 19 | + updated_by: str | None = None |
| 20 | + updated_at: str | None = None |
20 | 21 | nbytes: int |
| 22 | + permissions: Literal["READ", "WRITE", "READ_WRITE"] |
21 | 23 |
|
22 | 24 |
|
23 | | -class Directory(msgspec.Struct, tag="directory"): |
| 25 | +class Directory(msgspec.Struct, kw_only=True, omit_defaults=True, tag="directory"): |
24 | 26 | name: str |
25 | 27 | created_by: str |
26 | 28 | created_at: str |
27 | | - updated_at: str |
28 | | - contents: List[Union[File, Directory]] |
| 29 | + updated_by: str | None = None |
| 30 | + updated_at: str | None = None |
| 31 | + contents: list[File | Directory] |
29 | 32 |
|
30 | 33 |
|
31 | | -def bench(dumps, loads, ndata, schema=None): |
32 | | - data = make_filesystem_data(ndata) |
33 | | - if schema: |
34 | | - data = msgspec.convert(data, schema) |
35 | | - timer = timeit.Timer("func(data)", globals={"func": dumps, "data": data}) |
36 | | - n, t = timer.autorange() |
37 | | - dumps_time = t / n |
| 34 | +@dataclasses.dataclass |
| 35 | +class Benchmark: |
| 36 | + label: str |
| 37 | + version: str |
| 38 | + encode: Callable |
| 39 | + decode: Callable |
| 40 | + schema: Any = None |
38 | 41 |
|
39 | | - data = dumps(data) |
| 42 | + def run(self, data: bytes) -> dict: |
| 43 | + if self.schema is not None: |
| 44 | + data = msgspec.convert(data, self.schema) |
| 45 | + timer = timeit.Timer("func(data)", globals={"func": self.encode, "data": data}) |
| 46 | + n, t = timer.autorange() |
| 47 | + encode_time = t / n |
40 | 48 |
|
41 | | - timer = timeit.Timer("func(data)", globals={"func": loads, "data": data}) |
42 | | - n, t = timer.autorange() |
43 | | - loads_time = t / n |
44 | | - return dumps_time, loads_time |
| 49 | + data = self.encode(data) |
45 | 50 |
|
| 51 | + timer = timeit.Timer("func(data)", globals={"func": self.decode, "data": data}) |
| 52 | + n, t = timer.autorange() |
| 53 | + decode_time = t / n |
46 | 54 |
|
47 | | -def bench_msgspec_msgpack(n): |
48 | | - schema = File if n == 1 else Directory |
49 | | - enc = msgspec.msgpack.Encoder() |
50 | | - dec = msgspec.msgpack.Decoder(schema) |
51 | | - return bench(enc.encode, dec.decode, n, schema) |
52 | | - |
53 | | - |
54 | | -def bench_msgspec_json(n): |
55 | | - schema = File if n == 1 else Directory |
56 | | - enc = msgspec.json.Encoder() |
57 | | - dec = msgspec.json.Decoder(schema) |
58 | | - return bench(enc.encode, dec.decode, n, schema) |
59 | | - |
60 | | - |
61 | | -def bench_msgpack(n): |
62 | | - packer = msgpack.Packer() |
63 | | - loads = msgpack.loads |
64 | | - return bench(packer.pack, loads, n) |
| 55 | + return { |
| 56 | + "label": self.label, |
| 57 | + "encode": encode_time, |
| 58 | + "decode": decode_time, |
| 59 | + } |
65 | 60 |
|
66 | 61 |
|
67 | | -def bench_ujson(n): |
68 | | - return bench(ujson.dumps, ujson.loads, n) |
| 62 | +def json_benchmarks(): |
| 63 | + import orjson |
| 64 | + import ujson |
| 65 | + import rapidjson |
| 66 | + import simdjson |
69 | 67 |
|
| 68 | + simdjson_ver = importlib.metadata.version("pysimdjson") |
70 | 69 |
|
71 | | -def bench_orjson(n): |
72 | | - return bench(orjson.dumps, orjson.loads, n) |
| 70 | + rj_dumps = rapidjson.Encoder() |
| 71 | + rj_loads = rapidjson.Decoder() |
73 | 72 |
|
| 73 | + def uj_dumps(obj): |
| 74 | + return ujson.dumps(obj) |
74 | 75 |
|
75 | | -BENCHMARKS = [ |
76 | | - ("ujson", bench_ujson), |
77 | | - ("orjson", bench_orjson), |
78 | | - ("msgpack", bench_msgpack), |
79 | | - ("msgspec msgpack", bench_msgspec_msgpack), |
80 | | - ("msgspec json", bench_msgspec_json), |
81 | | -] |
82 | | - |
| 76 | + enc = msgspec.json.Encoder() |
| 77 | + dec = msgspec.json.Decoder(Directory) |
| 78 | + dec2 = msgspec.json.Decoder() |
83 | 79 |
|
84 | | -def run(n, quiet=False): |
85 | | - if quiet: |
| 80 | + return [ |
| 81 | + Benchmark("msgspec structs", None, enc.encode, dec.decode, Directory), |
| 82 | + Benchmark("msgspec", msgspec.__version__, enc.encode, dec2.decode), |
| 83 | + Benchmark("json", None, json.dumps, json.loads), |
| 84 | + Benchmark("orjson", orjson.__version__, orjson.dumps, orjson.loads), |
| 85 | + Benchmark("ujson", ujson.__version__, uj_dumps, ujson.loads), |
| 86 | + Benchmark("rapidjson", rapidjson.__version__, rj_dumps, rj_loads), |
| 87 | + Benchmark("simdjson", simdjson_ver, simdjson.dumps, simdjson.loads), |
| 88 | + ] |
86 | 89 |
|
87 | | - def log(x): |
88 | | - pass |
89 | 90 |
|
90 | | - else: |
91 | | - log = print |
| 91 | +def msgpack_benchmarks(): |
| 92 | + import msgpack |
| 93 | + import ormsgpack |
92 | 94 |
|
93 | | - title = f"Benchmark - {n} object{'s' if n > 1 else ''}" |
94 | | - log(title) |
| 95 | + enc = msgspec.msgpack.Encoder() |
| 96 | + dec = msgspec.msgpack.Decoder(Directory) |
| 97 | + dec2 = msgspec.msgpack.Decoder() |
95 | 98 |
|
96 | | - results = [] |
97 | | - for name, func in BENCHMARKS: |
98 | | - log(name) |
99 | | - dumps_time, loads_time = func(n) |
100 | | - log(f" dumps: {dumps_time * 1e6:.2f} us") |
101 | | - log(f" loads: {loads_time * 1e6:.2f} us") |
102 | | - log(f" total: {(dumps_time + loads_time) * 1e6:.2f} us") |
103 | | - results.append((name, dumps_time, loads_time)) |
104 | | - return results |
| 99 | + return [ |
| 100 | + Benchmark("msgspec structs", None, enc.encode, dec.decode, Directory), |
| 101 | + Benchmark("msgspec", msgspec.__version__, enc.encode, dec2.decode), |
| 102 | + Benchmark("msgpack", msgpack.__version__, msgpack.dumps, msgpack.loads), |
| 103 | + Benchmark( |
| 104 | + "ormsgpack", ormsgpack.__version__, ormsgpack.packb, ormsgpack.unpackb |
| 105 | + ), |
| 106 | + ] |
105 | 107 |
|
106 | 108 |
|
107 | 109 | def main(): |
108 | 110 | import argparse |
109 | 111 |
|
110 | | - bench_names = ["1", "1k"] |
111 | | - |
112 | 112 | parser = argparse.ArgumentParser( |
113 | | - description="Benchmark different python serializers" |
| 113 | + description="Benchmark different python serialization libraries" |
114 | 114 | ) |
115 | 115 | parser.add_argument( |
116 | | - "--benchmark", |
117 | | - "-b", |
118 | | - action="append", |
119 | | - choices=["all", *bench_names], |
120 | | - default=[], |
121 | | - help="which benchmark(s) to run, defaults to 'all'", |
| 116 | + "--versions", |
| 117 | + action="store_true", |
| 118 | + help="Output library version info, and exit immediately", |
122 | 119 | ) |
123 | 120 | parser.add_argument( |
124 | | - "--json", |
125 | | - action="store_true", |
126 | | - help="whether to output the results as json", |
| 121 | + "-n", |
| 122 | + type=int, |
| 123 | + help="The number of objects in the generated data, defaults to 1000", |
| 124 | + default=1000, |
127 | 125 | ) |
128 | 126 | parser.add_argument( |
129 | | - "--no-gc", |
| 127 | + "-p", |
| 128 | + "--protocol", |
| 129 | + choices=["json", "msgpack"], |
| 130 | + default="json", |
| 131 | + help="The protocol to benchmark, defaults to JSON", |
| 132 | + ) |
| 133 | + parser.add_argument( |
| 134 | + "--json", |
130 | 135 | action="store_true", |
131 | | - help="whether to disable the gc during benchmarking", |
| 136 | + help="whether to output the results as json", |
132 | 137 | ) |
133 | 138 | args = parser.parse_args() |
134 | 139 |
|
135 | | - if "all" in args.benchmark or not args.benchmark: |
136 | | - to_run = bench_names |
137 | | - else: |
138 | | - to_run = sorted(set(args.benchmark)) |
| 140 | + benchmarks = json_benchmarks() if args.protocol == "json" else msgpack_benchmarks() |
| 141 | + |
| 142 | + if args.versions: |
| 143 | + for bench in benchmarks: |
| 144 | + if bench.version is not None: |
| 145 | + print(f"- {bench.label}: {bench.version}") |
| 146 | + sys.exit(0) |
139 | 147 |
|
140 | | - results = {} |
141 | | - for bench in to_run: |
142 | | - n = 1000 if bench.startswith("1k") else 1 |
143 | | - results[bench] = run(n, quiet=args.json) |
| 148 | + data = make_filesystem_data(args.n) |
| 149 | + |
| 150 | + results = [benchmark.run(data) for benchmark in benchmarks] |
144 | 151 |
|
145 | 152 | if args.json: |
146 | | - print(json.dumps(results)) |
| 153 | + for line in results: |
| 154 | + print(json.dumps(line)) |
| 155 | + else: |
| 156 | + # Compose the results table |
| 157 | + results.sort(key=lambda row: row["encode"] + row["decode"]) |
| 158 | + best_et = results[0]["encode"] |
| 159 | + best_dt = results[0]["decode"] |
| 160 | + best_tt = best_et + best_dt |
| 161 | + |
| 162 | + columns = ( |
| 163 | + "", |
| 164 | + "encode (μs)", |
| 165 | + "vs.", |
| 166 | + "decode (μs)", |
| 167 | + "vs.", |
| 168 | + "total (μs)", |
| 169 | + "vs.", |
| 170 | + ) |
| 171 | + rows = [ |
| 172 | + ( |
| 173 | + r["label"], |
| 174 | + f"{1_000_000 * r['encode']:.1f}", |
| 175 | + f"{r['encode'] / best_et:.1f}", |
| 176 | + f"{1_000_000 * r['decode']:.1f}", |
| 177 | + f"{r['decode'] / best_dt:.1f}", |
| 178 | + f"{1_000_000 * (r['encode'] + r['decode']):.1f}", |
| 179 | + f"{(r['encode'] + r['decode']) / best_tt:.1f}", |
| 180 | + ) |
| 181 | + for r in results |
| 182 | + ] |
| 183 | + widths = tuple( |
| 184 | + max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns) |
| 185 | + ) |
| 186 | + row_template = ("|" + (" %%-%ds |" * len(columns))) % widths |
| 187 | + header = row_template % tuple(columns) |
| 188 | + bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths) |
| 189 | + bar = "+%s+" % "+".join("-" * (w + 2) for w in widths) |
| 190 | + parts = [bar, header, bar_underline] |
| 191 | + for r in rows: |
| 192 | + parts.append(row_template % r) |
| 193 | + parts.append(bar) |
| 194 | + print("\n".join(parts)) |
147 | 195 |
|
148 | 196 |
|
149 | 197 | if __name__ == "__main__": |
|
0 commit comments