Skip to content

Commit 50d6d8d

Browse files
committed
feat: Add apples-to-apples timing benchmark
Compares Rust vs C mozjpeg with identical settings: - 4:2:0 subsampling - Robidoux quant tables - Trellis AC+DC - Huffman optimization - Overshoot deringing Results show Rust 1.65-2.57x faster than C mozjpeg.
1 parent 999ef09 commit 50d6d8d

File tree

1 file changed

+234
-0
lines changed

1 file changed

+234
-0
lines changed

examples/timing_benchmark.rs

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
//! Apples-to-apples timing benchmark: Rust vs C mozjpeg
2+
//!
3+
//! Uses identical settings for fair comparison:
4+
//! - 4:2:0 subsampling
5+
//! - Robidoux quant tables (mozjpeg default)
6+
//! - Trellis quantization
7+
//! - Huffman optimization
8+
//! - Overshoot deringing
9+
//!
10+
//! Run with: cargo run --release --example timing_benchmark
11+
12+
use mozjpeg_rs::{Encoder, Subsampling, TrellisConfig};
13+
use std::fs::File;
14+
use std::path::Path;
15+
use std::time::Instant;
16+
17+
fn load_png(path: &Path) -> Option<(Vec<u8>, u32, u32)> {
18+
let file = File::open(path).ok()?;
19+
let decoder = png::Decoder::new(file);
20+
let mut reader = decoder.read_info().ok()?;
21+
let mut buf = vec![0u8; reader.output_buffer_size()];
22+
let info = reader.next_frame(&mut buf).ok()?;
23+
let rgb = match info.color_type {
24+
png::ColorType::Rgb => buf[..info.buffer_size()].to_vec(),
25+
png::ColorType::Rgba => buf[..info.buffer_size()]
26+
.chunks(4)
27+
.flat_map(|c| [c[0], c[1], c[2]])
28+
.collect(),
29+
_ => return None,
30+
};
31+
Some((rgb, info.width, info.height))
32+
}
33+
34+
fn encode_rust_baseline(rgb: &[u8], width: u32, height: u32, quality: u8) -> Vec<u8> {
35+
Encoder::baseline_optimized()
36+
.quality(quality)
37+
.progressive(false)
38+
.optimize_huffman(true)
39+
.overshoot_deringing(true)
40+
.trellis(TrellisConfig::default())
41+
.subsampling(Subsampling::S420)
42+
.encode_rgb(rgb, width, height)
43+
.expect("encoding failed")
44+
}
45+
46+
fn encode_c_baseline(rgb: &[u8], width: u32, height: u32, quality: u8) -> Vec<u8> {
47+
use mozjpeg_sys::*;
48+
use std::ptr;
49+
50+
unsafe {
51+
let mut cinfo: jpeg_compress_struct = std::mem::zeroed();
52+
let mut jerr: jpeg_error_mgr = std::mem::zeroed();
53+
54+
cinfo.common.err = jpeg_std_error(&mut jerr);
55+
jpeg_CreateCompress(
56+
&mut cinfo,
57+
JPEG_LIB_VERSION as i32,
58+
std::mem::size_of::<jpeg_compress_struct>(),
59+
);
60+
61+
let mut outbuffer: *mut u8 = ptr::null_mut();
62+
let mut outsize: libc::c_ulong = 0;
63+
jpeg_mem_dest(&mut cinfo, &mut outbuffer, &mut outsize);
64+
65+
cinfo.image_width = width;
66+
cinfo.image_height = height;
67+
cinfo.input_components = 3;
68+
cinfo.in_color_space = J_COLOR_SPACE::JCS_RGB;
69+
70+
jpeg_set_defaults(&mut cinfo);
71+
72+
// Use Robidoux tables (mozjpeg default, index 3)
73+
jpeg_c_set_int_param(&mut cinfo, JINT_BASE_QUANT_TBL_IDX, 3);
74+
jpeg_set_quality(&mut cinfo, quality as i32, 1);
75+
76+
// 4:2:0 subsampling
77+
(*cinfo.comp_info.offset(0)).h_samp_factor = 2;
78+
(*cinfo.comp_info.offset(0)).v_samp_factor = 2;
79+
(*cinfo.comp_info.offset(1)).h_samp_factor = 1;
80+
(*cinfo.comp_info.offset(1)).v_samp_factor = 1;
81+
(*cinfo.comp_info.offset(2)).h_samp_factor = 1;
82+
(*cinfo.comp_info.offset(2)).v_samp_factor = 1;
83+
84+
// Huffman optimization
85+
cinfo.optimize_coding = 1;
86+
87+
// Trellis quantization (AC + DC)
88+
jpeg_c_set_bool_param(&mut cinfo, JBOOLEAN_TRELLIS_QUANT, 1);
89+
jpeg_c_set_bool_param(&mut cinfo, JBOOLEAN_TRELLIS_QUANT_DC, 1);
90+
91+
// Overshoot deringing
92+
jpeg_c_set_bool_param(&mut cinfo, JBOOLEAN_OVERSHOOT_DERINGING, 1);
93+
94+
jpeg_start_compress(&mut cinfo, 1);
95+
96+
let row_stride = width as usize * 3;
97+
while cinfo.next_scanline < cinfo.image_height {
98+
let row_ptr = rgb.as_ptr().add(cinfo.next_scanline as usize * row_stride);
99+
let row_array: [*const u8; 1] = [row_ptr];
100+
jpeg_write_scanlines(&mut cinfo, row_array.as_ptr(), 1);
101+
}
102+
103+
jpeg_finish_compress(&mut cinfo);
104+
jpeg_destroy_compress(&mut cinfo);
105+
106+
let result = std::slice::from_raw_parts(outbuffer, outsize as usize).to_vec();
107+
libc::free(outbuffer as *mut libc::c_void);
108+
result
109+
}
110+
}
111+
112+
fn benchmark<F: Fn() -> Vec<u8>>(f: F, warmup: u32, iterations: u32) -> (f64, usize) {
113+
// Warmup
114+
for _ in 0..warmup {
115+
let _ = f();
116+
}
117+
118+
let start = Instant::now();
119+
let mut size = 0;
120+
for _ in 0..iterations {
121+
size = f().len();
122+
}
123+
let elapsed = start.elapsed();
124+
let ms = elapsed.as_secs_f64() * 1000.0 / iterations as f64;
125+
(ms, size)
126+
}
127+
128+
fn main() {
129+
let corpus_path = Path::new("corpus/kodak");
130+
if !corpus_path.exists() {
131+
eprintln!("Corpus not found at corpus/kodak");
132+
eprintln!("Run: ./scripts/fetch-corpus.sh");
133+
return;
134+
}
135+
136+
// Load a representative image
137+
let mut entries: Vec<_> = std::fs::read_dir(corpus_path)
138+
.unwrap()
139+
.filter_map(|e| e.ok())
140+
.filter(|e| e.path().extension().is_some_and(|ext| ext == "png"))
141+
.collect();
142+
entries.sort_by_key(|e| e.path());
143+
144+
let test_image = entries.first().expect("No images in corpus").path();
145+
let (rgb, width, height) = load_png(&test_image).expect("Failed to load test image");
146+
147+
println!("\n=== Apples-to-Apples Timing Benchmark ===");
148+
println!("Image: {}x{} ({})", width, height, test_image.file_name().unwrap().to_string_lossy());
149+
println!("Settings: 4:2:0, Robidoux tables, Trellis AC+DC, Huffman opt, Deringing");
150+
println!();
151+
152+
let quality = 85u8;
153+
let warmup = 5;
154+
let iterations = 30;
155+
156+
println!("Quality {} ({} iterations, {} warmup):\n", quality, iterations, warmup);
157+
158+
// Baseline mode (sequential)
159+
let (rust_ms, rust_size) = benchmark(
160+
|| encode_rust_baseline(&rgb, width, height, quality),
161+
warmup,
162+
iterations,
163+
);
164+
let (c_ms, c_size) = benchmark(
165+
|| encode_c_baseline(&rgb, width, height, quality),
166+
warmup,
167+
iterations,
168+
);
169+
170+
println!("| Encoder | Time (ms) | Size (bytes) | Size Diff |");
171+
println!("|---------|-----------|--------------|-----------|");
172+
println!(
173+
"| Rust | {:>9.2} | {:>12} | {:>+8.2}% |",
174+
rust_ms,
175+
rust_size,
176+
((rust_size as f64 / c_size as f64) - 1.0) * 100.0
177+
);
178+
println!(
179+
"| C | {:>9.2} | {:>12} | {:>8} |",
180+
c_ms, c_size, "baseline"
181+
);
182+
println!();
183+
184+
let speed_ratio = rust_ms / c_ms;
185+
if speed_ratio < 1.0 {
186+
println!("Rust is {:.2}x FASTER than C mozjpeg", 1.0 / speed_ratio);
187+
} else {
188+
println!("Rust is {:.2}x SLOWER than C mozjpeg", speed_ratio);
189+
}
190+
191+
// Also run on a larger synthetic image for more stable timing
192+
println!("\n--- Synthetic 2048x2048 image ---\n");
193+
194+
let large_rgb: Vec<u8> = (0..(2048 * 2048 * 3))
195+
.map(|i| ((i * 17 + (i / 2048) * 31) % 256) as u8)
196+
.collect();
197+
198+
let warmup = 2;
199+
let iterations = 10;
200+
201+
println!("Quality {} ({} iterations, {} warmup):\n", quality, iterations, warmup);
202+
203+
let (rust_ms, rust_size) = benchmark(
204+
|| encode_rust_baseline(&large_rgb, 2048, 2048, quality),
205+
warmup,
206+
iterations,
207+
);
208+
let (c_ms, c_size) = benchmark(
209+
|| encode_c_baseline(&large_rgb, 2048, 2048, quality),
210+
warmup,
211+
iterations,
212+
);
213+
214+
println!("| Encoder | Time (ms) | Size (bytes) | Size Diff |");
215+
println!("|---------|-----------|--------------|-----------|");
216+
println!(
217+
"| Rust | {:>9.2} | {:>12} | {:>+8.2}% |",
218+
rust_ms,
219+
rust_size,
220+
((rust_size as f64 / c_size as f64) - 1.0) * 100.0
221+
);
222+
println!(
223+
"| C | {:>9.2} | {:>12} | {:>8} |",
224+
c_ms, c_size, "baseline"
225+
);
226+
println!();
227+
228+
let speed_ratio = rust_ms / c_ms;
229+
if speed_ratio < 1.0 {
230+
println!("Rust is {:.2}x FASTER than C mozjpeg", 1.0 / speed_ratio);
231+
} else {
232+
println!("Rust is {:.2}x SLOWER than C mozjpeg", speed_ratio);
233+
}
234+
}

0 commit comments

Comments
 (0)