Skip to content

Commit 368304a

Browse files
authored
Merge pull request #1 from singular-labs/feat/benchmark-examples
feat: add benchmark generator and examples
2 parents 6faee34 + 379f427 commit 368304a

File tree

3 files changed

+502
-0
lines changed

3 files changed

+502
-0
lines changed

README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,47 @@ All fallible operations return `AsyncJsonStreamReaderError`:
7373

7474
Minimum supported Rust version is **1.74**.
7575

76+
## Benchmark (Serde vs asyncjsonstream)
77+
78+
The examples folder includes a generator and benchmark for a single large JSON object with a
79+
`rows` array. This comparison highlights the memory savings when you **stream and skip** large
80+
fields instead of deserializing full objects.
81+
82+
### Generate a 5GB fixture
83+
84+
```bash
85+
cargo run --release --example generate_big_object -- \
86+
--path /tmp/big_object.json \
87+
--target-bytes 5368709120 \
88+
--payload-bytes 1024
89+
```
90+
91+
### Run benchmarks (macOS)
92+
93+
```bash
94+
/usr/bin/time -l cargo run --release --example bench_big_object -- \
95+
--path /tmp/big_object.json --mode async
96+
97+
/usr/bin/time -l cargo run --release --example bench_big_object -- \
98+
--path /tmp/big_object.json --mode async-light
99+
100+
/usr/bin/time -l cargo run --release --example bench_big_object -- \
101+
--path /tmp/big_object.json --mode serde
102+
```
103+
104+
`async` deserializes each row into a `serde_json::Value` (higher memory). `async-light` only
105+
reads `id` and skips other fields using tokens (low memory).
106+
107+
### Results (MacBook Pro, macOS, 5GB file, payload 1KB)
108+
109+
| Mode | Rows | Elapsed (ms) | Max RSS (bytes) | Peak footprint (bytes) |
110+
|--------------|----------|--------------|-----------------|------------------------|
111+
| async | 4,979,433 | 7,432 | 3,320,676,352 | 5,382,197,400 |
112+
| async-light | 4,979,433 | 10,340 | 2,916,352 | 2,146,616 |
113+
| serde | 4,979,433 | 6,662 | 10,902,372,352 | 14,253,713,704 |
114+
115+
Checksums matched across modes, confirming identical `id` aggregation.
116+
76117
## License
77118

78119
Licensed under either of:

examples/bench_big_object.rs

Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
use asyncjsonstream::{AsyncJsonStreamReader, JsonToken};
2+
use serde_json::Value;
3+
use std::env;
4+
use std::fs;
5+
use std::path::PathBuf;
6+
use std::time::Instant;
7+
use tokio::io::BufReader;
8+
9+
fn usage() {
10+
eprintln!(
11+
"Usage: bench_big_object --path <file> [--mode serde|async|async-light|both] [--repeat <n>]"
12+
);
13+
}
14+
15+
fn parse_u64(value: &str, name: &str) -> u64 {
16+
value
17+
.parse()
18+
.unwrap_or_else(|_| panic!("Invalid {name}: {value}"))
19+
}
20+
21+
fn parse_mode(value: &str) -> Mode {
22+
match value {
23+
"serde" => Mode::Serde,
24+
"async" => Mode::Async,
25+
"async-light" => Mode::AsyncLight,
26+
"both" => Mode::Both,
27+
_ => panic!("Invalid mode: {value}"),
28+
}
29+
}
30+
31+
#[derive(Clone, Copy)]
32+
enum Mode {
33+
Serde,
34+
Async,
35+
AsyncLight,
36+
Both,
37+
}
38+
39+
fn bench_serde(path: &PathBuf) -> Result<(u64, u64, u128), Box<dyn std::error::Error>> {
40+
let start = Instant::now();
41+
let data = fs::read(path)?;
42+
let parsed: Value = serde_json::from_slice(&data)?;
43+
let rows = parsed
44+
.get("rows")
45+
.and_then(|v| v.as_array())
46+
.ok_or_else(|| "missing rows array".to_string())?;
47+
48+
let mut checksum: u64 = 0;
49+
for row in rows {
50+
if let Some(id) = row.get("id").and_then(|v| v.as_u64()) {
51+
checksum = checksum.wrapping_add(id);
52+
}
53+
}
54+
55+
Ok((rows.len() as u64, checksum, start.elapsed().as_millis()))
56+
}
57+
58+
async fn bench_async(path: &PathBuf) -> Result<(u64, u64, u128), Box<dyn std::error::Error>> {
59+
let start = Instant::now();
60+
let file = tokio::fs::File::open(path).await?;
61+
let reader = BufReader::new(file);
62+
let mut reader = AsyncJsonStreamReader::new(reader);
63+
64+
let mut rows: u64 = 0;
65+
let mut checksum: u64 = 0;
66+
67+
while let Some(key) = reader.next_object_entry().await? {
68+
if key == "rows" {
69+
while reader.start_array_item().await? {
70+
let obj = reader.deserialize_object().await?;
71+
if let Some(id) = obj.get("id").and_then(|v| v.as_u64()) {
72+
checksum = checksum.wrapping_add(id);
73+
}
74+
rows += 1;
75+
}
76+
}
77+
}
78+
79+
Ok((rows, checksum, start.elapsed().as_millis()))
80+
}
81+
82+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83+
enum Container {
84+
Object,
85+
Array,
86+
}
87+
88+
async fn consume_value<R>(
89+
reader: &mut AsyncJsonStreamReader<R>,
90+
) -> Result<(), Box<dyn std::error::Error>>
91+
where
92+
R: tokio::io::AsyncRead + Unpin,
93+
{
94+
let token = reader
95+
.next_token()
96+
.await?
97+
.ok_or_else(|| "unexpected EOF while consuming value".to_string())?;
98+
consume_value_from_token(reader, token).await
99+
}
100+
101+
async fn consume_value_from_token<R>(
102+
reader: &mut AsyncJsonStreamReader<R>,
103+
mut token: JsonToken,
104+
) -> Result<(), Box<dyn std::error::Error>>
105+
where
106+
R: tokio::io::AsyncRead + Unpin,
107+
{
108+
let mut stack: Vec<Container> = Vec::new();
109+
110+
loop {
111+
match token {
112+
JsonToken::StartObject => stack.push(Container::Object),
113+
JsonToken::StartArray => stack.push(Container::Array),
114+
JsonToken::EndObject => match stack.pop() {
115+
Some(Container::Object) => {}
116+
_ => return Err("unexpected EndObject".into()),
117+
},
118+
JsonToken::EndArray => match stack.pop() {
119+
Some(Container::Array) => {}
120+
_ => return Err("unexpected EndArray".into()),
121+
},
122+
JsonToken::EndObjectOrListItem => {}
123+
JsonToken::Key(_) => {}
124+
JsonToken::String(_)
125+
| JsonToken::Number(_)
126+
| JsonToken::Boolean(_)
127+
| JsonToken::Null => {}
128+
}
129+
130+
if stack.is_empty() {
131+
match token {
132+
JsonToken::String(_)
133+
| JsonToken::Number(_)
134+
| JsonToken::Boolean(_)
135+
| JsonToken::Null
136+
| JsonToken::EndObject
137+
| JsonToken::EndArray => break,
138+
_ => {}
139+
}
140+
}
141+
142+
token = reader
143+
.next_token()
144+
.await?
145+
.ok_or_else(|| "unexpected EOF while consuming value".to_string())?;
146+
}
147+
148+
Ok(())
149+
}
150+
151+
async fn bench_async_light(path: &PathBuf) -> Result<(u64, u64, u128), Box<dyn std::error::Error>> {
152+
let start = Instant::now();
153+
let file = tokio::fs::File::open(path).await?;
154+
let reader = BufReader::new(file);
155+
let mut reader = AsyncJsonStreamReader::new(reader);
156+
157+
let mut rows: u64 = 0;
158+
let mut checksum: u64 = 0;
159+
160+
let token = reader
161+
.next_token()
162+
.await?
163+
.ok_or_else(|| "empty input".to_string())?;
164+
if token != JsonToken::StartObject {
165+
return Err("expected root object".into());
166+
}
167+
168+
loop {
169+
let token = reader
170+
.next_token()
171+
.await?
172+
.ok_or_else(|| "unexpected EOF in root object".to_string())?;
173+
match token {
174+
JsonToken::Key(key) => {
175+
if key == "rows" {
176+
let next = reader
177+
.next_token()
178+
.await?
179+
.ok_or_else(|| "unexpected EOF before rows array".to_string())?;
180+
if next != JsonToken::StartArray {
181+
return Err("expected rows array".into());
182+
}
183+
184+
loop {
185+
let token = reader
186+
.next_token()
187+
.await?
188+
.ok_or_else(|| "unexpected EOF in rows array".to_string())?;
189+
match token {
190+
JsonToken::EndArray => break,
191+
JsonToken::EndObjectOrListItem => continue,
192+
JsonToken::StartObject => loop {
193+
let token = reader.next_token().await?.ok_or_else(|| {
194+
"unexpected EOF while reading row".to_string()
195+
})?;
196+
match token {
197+
JsonToken::EndObject => {
198+
rows += 1;
199+
break;
200+
}
201+
JsonToken::EndObjectOrListItem => continue,
202+
JsonToken::Key(field) => {
203+
if field == "id" {
204+
let token =
205+
reader.next_token().await?.ok_or_else(|| {
206+
"unexpected EOF reading id".to_string()
207+
})?;
208+
match token {
209+
JsonToken::Number(n) => {
210+
if let Ok(id) = n.parse::<u64>() {
211+
checksum = checksum.wrapping_add(id);
212+
}
213+
}
214+
JsonToken::String(s) => {
215+
if let Ok(id) = s.parse::<u64>() {
216+
checksum = checksum.wrapping_add(id);
217+
}
218+
}
219+
JsonToken::Null => {}
220+
other => {
221+
return Err(format!(
222+
"unexpected id token: {other:?}"
223+
)
224+
.into());
225+
}
226+
}
227+
} else {
228+
consume_value(&mut reader).await?;
229+
}
230+
}
231+
other => {
232+
return Err(
233+
format!("unexpected token in row: {other:?}").into()
234+
);
235+
}
236+
}
237+
},
238+
other => consume_value_from_token(&mut reader, other).await?,
239+
}
240+
}
241+
} else {
242+
consume_value(&mut reader).await?;
243+
}
244+
}
245+
JsonToken::EndObject => break,
246+
JsonToken::EndObjectOrListItem => continue,
247+
other => {
248+
return Err(format!("unexpected token in root: {other:?}").into());
249+
}
250+
}
251+
}
252+
253+
Ok((rows, checksum, start.elapsed().as_millis()))
254+
}
255+
256+
#[tokio::main]
257+
async fn main() -> Result<(), Box<dyn std::error::Error>> {
258+
let mut path: Option<PathBuf> = None;
259+
let mut mode = Mode::Both;
260+
let mut repeat: u64 = 1;
261+
262+
let args: Vec<String> = env::args().skip(1).collect();
263+
let mut i = 0;
264+
while i < args.len() {
265+
match args[i].as_str() {
266+
"--path" => {
267+
i += 1;
268+
if i >= args.len() {
269+
usage();
270+
panic!("Missing value for --path");
271+
}
272+
path = Some(PathBuf::from(&args[i]));
273+
}
274+
"--mode" => {
275+
i += 1;
276+
if i >= args.len() {
277+
usage();
278+
panic!("Missing value for --mode");
279+
}
280+
mode = parse_mode(&args[i]);
281+
}
282+
"--repeat" => {
283+
i += 1;
284+
if i >= args.len() {
285+
usage();
286+
panic!("Missing value for --repeat");
287+
}
288+
repeat = parse_u64(&args[i], "repeat");
289+
}
290+
"--help" | "-h" => {
291+
usage();
292+
return Ok(());
293+
}
294+
other => {
295+
usage();
296+
panic!("Unknown argument: {other}");
297+
}
298+
}
299+
i += 1;
300+
}
301+
302+
let path = path.unwrap_or_else(|| {
303+
usage();
304+
panic!("--path is required");
305+
});
306+
307+
for run in 1..=repeat {
308+
if matches!(mode, Mode::Serde | Mode::Both) {
309+
let (rows, checksum, ms) = bench_serde(&path)?;
310+
println!("run={run} mode=serde rows={rows} checksum={checksum} elapsed_ms={ms}");
311+
}
312+
if matches!(mode, Mode::Async | Mode::Both) {
313+
let (rows, checksum, ms) = bench_async(&path).await?;
314+
println!("run={run} mode=async rows={rows} checksum={checksum} elapsed_ms={ms}");
315+
}
316+
if matches!(mode, Mode::AsyncLight) {
317+
let (rows, checksum, ms) = bench_async_light(&path).await?;
318+
println!("run={run} mode=async-light rows={rows} checksum={checksum} elapsed_ms={ms}");
319+
}
320+
}
321+
322+
Ok(())
323+
}

0 commit comments

Comments
 (0)