Add Rust compression/decompression benchmark

posborne · posborne · commit d28bbb9aab60 · 2025-12-22T13:18:21.000-06:00
Adds a benchmark testing compression and decompression performance for
two popular algorithms: Gzip (deflate) and Brotli. The benchmark
processes ~1MB of mixed data (repeated patterns, structured data,
natural text, and random bytes).

Tests both compression and decompression in a single benchmark cycle,
with verification that decompressed data matches the original.
diff --git a/benchmarks/rust-compression/.gitignore b/benchmarks/rust-compression/.gitignore
@@ -0,0 +1 @@
+benchmark.wasm
diff --git a/benchmarks/rust-compression/Dockerfile b/benchmarks/rust-compression/Dockerfile
@@ -0,0 +1 @@
+../Dockerfile.rust
diff --git a/benchmarks/rust-compression/README.md b/benchmarks/rust-compression/README.md
@@ -0,0 +1,44 @@
+# Rust Compression/Decompression Benchmark
+
+This benchmark tests compression and decompression performance for multiple algorithms commonly used in web and systems programming.
+
+## What it tests
+
+The benchmark performs compression and decompression using two algorithms:
+
+1. **Gzip (Deflate)** - via `flate2` with pure Rust backend
+   - Classic compression used in HTTP, gzip files, PNG images
+   - Good balance of speed and compression ratio
+   
+2. **Brotli** - via `brotli` crate
+   - Modern compression by Google, optimized for web content
+   - Better compression ratios than gzip, especially for text/HTML/JSON
+
+Each algorithm:
+- Compresses the input data
+- Decompresses it back
+- Verifies the output matches the original
+
+## Input Data
+
+The `default.input` file (~1 MB) contains a mix of:
+- Repeated patterns (compress very well)
+- Structured JSON-like data (compresses well)
+- Natural language text (compresses moderately)
+- Random bytes (doesn't compress well)
+
+This mix provides a realistic workload showing how algorithms perform on different data types.
+
+## Implementation
+
+Uses:
+- `flate2` 1.0 with pure Rust backend for WASM compatibility
+- `brotli` 7.0 for Brotli compression
+
+## Performance Notes
+
+Expected compression ratios on the test data:
+- Gzip: ~24% (4:1 compression)
+- Brotli: ~7% (14:1 compression)
+
+Brotli achieves better compression but typically requires more CPU time.
diff --git a/benchmarks/rust-compression/benchmark.stderr.expected b/benchmarks/rust-compression/benchmark.stderr.expected
@@ -0,0 +1,3 @@
+[rust-compression] original size: 1731587 bytes
+[rust-compression] gzip compressed: 192012 bytes (11.1%)
+[rust-compression] brotli compressed: 167110 bytes (9.7%)
diff --git a/benchmarks/rust-compression/benchmark.stdout.expected b/benchmarks/rust-compression/benchmark.stdout.expected
diff --git a/benchmarks/rust-compression/default.input b/benchmarks/rust-compression/default.input
diff --git a/benchmarks/rust-compression/generate_input.py b/benchmarks/rust-compression/generate_input.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+"""Generate input data for compression benchmark.
+
+Creates a file with mixed content that compresses reasonably well:
+- Repeated text patterns
+- JSON-like structured data
+- Some random data
+"""
+
+import random
+import string
+
+
+def generate_text_block(size):
+    """Generate semi-random text that compresses well."""
+    words = [
+        "the",
+        "quick",
+        "brown",
+        "fox",
+        "jumps",
+        "over",
+        "lazy",
+        "dog",
+        "hello",
+        "world",
+        "test",
+        "data",
+        "benchmark",
+        "compression",
+        "algorithm",
+        "performance",
+        "measure",
+        "speed",
+        "quality",
+    ]
+
+    text = []
+    while len(" ".join(text)) < size:
+        text.append(random.choice(words))
+
+    return " ".join(text)[:size]
+
+
+def generate_json_like_data():
+    """Generate JSON-like structured data."""
+    data = []
+    for i in range(1000):
+        record = f'{{"id": {i}, "name": "user_{i}", "email": "user{i}@example.com", '
+        record += f'"status": "active", "score": {random.randint(0, 100)}, '
+        record += f'"tags": ["tag1", "tag2", "tag3"]}}\n'
+        data.append(record)
+    return "".join(data)
+
+
+def generate_repeated_pattern():
+    """Generate data with lots of repetition (compresses very well)."""
+    pattern = "ABCDEFGHIJ" * 100
+    return (pattern + "\n") * 1000
+
+
+def main():
+    with open("default.input", "wb") as f:
+        # Mix of different data types for realistic compression testing
+
+        # 1. Repeated patterns (compress very well)
+        f.write(generate_repeated_pattern().encode("utf-8"))
+
+        # 2. Structured data (compresses well)
+        f.write(generate_json_like_data().encode("utf-8"))
+
+        # 3. Natural language-like text (compresses moderately)
+        f.write(generate_text_block(500000).encode("utf-8"))
+
+        # 4. Some random data (doesn't compress well)
+        random_bytes = bytes(random.randint(0, 255) for _ in range(100000))
+        f.write(random_bytes)
+
+    import os
+
+    size = os.path.getsize("default.input")
+    print(f"Generated input file: {size} bytes ({size / 1024 / 1024:.2f} MB)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/rust-compression/rust-benchmark/.gitignore b/benchmarks/rust-compression/rust-benchmark/.gitignore
@@ -0,0 +1 @@
+target
diff --git a/benchmarks/rust-compression/rust-benchmark/Cargo.lock b/benchmarks/rust-compression/rust-benchmark/Cargo.lock
diff --git a/benchmarks/rust-compression/rust-benchmark/Cargo.toml b/benchmarks/rust-compression/rust-benchmark/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "benchmark"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] }
+brotli = "7.0"
+sightglass-api = "0.1"
+
+[workspace]
diff --git a/benchmarks/rust-compression/rust-benchmark/src/main.rs b/benchmarks/rust-compression/rust-benchmark/src/main.rs
@@ -0,0 +1,69 @@
+//! Compression and decompression benchmark testing multiple algorithms.
+//!
+//! Tests two popular compression algorithms:
+//! - Deflate (via flate2) - gzip/zlib compression
+//! - Brotli - compression by Google, optimized for web content
+
+use flate2::read::{GzDecoder, GzEncoder};
+use flate2::Compression;
+use sightglass_api as bench;
+use std::io::Read;
+
+fn main() {
+    // Read the input data
+    let data = std::fs::read("default.input").expect("unable to read default.input");
+
+    bench::start();
+
+    // Test 1: Gzip (deflate) compression/decompression
+    let gzip_compressed = compress_gzip(&data);
+    let gzip_decompressed = decompress_gzip(&gzip_compressed);
+    assert_eq!(data.len(), gzip_decompressed.len());
+
+    // Test 2: Brotli compression/decompression
+    let brotli_compressed = compress_brotli(&data);
+    let brotli_decompressed = decompress_brotli(&brotli_compressed);
+    assert_eq!(data.len(), brotli_decompressed.len());
+
+    bench::end();
+
+    eprintln!("[rust-compression] original size: {} bytes", data.len());
+    eprintln!(
+        "[rust-compression] gzip compressed: {} bytes ({:.1}%)",
+        gzip_compressed.len(),
+        100.0 * gzip_compressed.len() as f64 / data.len() as f64
+    );
+    eprintln!(
+        "[rust-compression] brotli compressed: {} bytes ({:.1}%)",
+        brotli_compressed.len(),
+        100.0 * brotli_compressed.len() as f64 / data.len() as f64
+    );
+}
+
+fn compress_gzip(data: &[u8]) -> Vec<u8> {
+    let mut encoder = GzEncoder::new(data, Compression::default());
+    let mut compressed = Vec::new();
+    encoder.read_to_end(&mut compressed).unwrap();
+    compressed
+}
+
+fn decompress_gzip(data: &[u8]) -> Vec<u8> {
+    let mut decoder = GzDecoder::new(data);
+    let mut decompressed = Vec::new();
+    decoder.read_to_end(&mut decompressed).unwrap();
+    decompressed
+}
+
+fn compress_brotli(data: &[u8]) -> Vec<u8> {
+    let mut compressed = Vec::new();
+    let mut reader = std::io::Cursor::new(data);
+    brotli::BrotliCompress(&mut reader, &mut compressed, &Default::default()).unwrap();
+    compressed
+}
+
+fn decompress_brotli(data: &[u8]) -> Vec<u8> {
+    let mut decompressed = Vec::new();
+    let mut reader = std::io::Cursor::new(data);
+    brotli::BrotliDecompress(&mut reader, &mut decompressed).unwrap();
+    decompressed
+}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[rust-compression] original size: 1731587 bytes`
	`2`	`+[rust-compression] gzip compressed: 192012 bytes (11.1%)`
	`3`	`+[rust-compression] brotli compressed: 167110 bytes (9.7%)`