feat(benchmarks): add benchmark suite and configuration

atasoglu · atasoglu · commit a0dbecc492ee · 2025-10-29T02:43:12.000+03:00
- Implement benchmark suite for sqlite-vec-client.
- Add CRUD operation benchmarks with performance metrics.
- Create configuration loader for benchmark settings.
- Include utility functions for generating test data.
- Add CSV export functionality for benchmark results.
diff --git a/.gitignore b/.gitignore
@@ -205,3 +205,5 @@ cython_debug/
 marimo/_static/
 marimo/_lsp/
 __marimo__/
+
+*.csv
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
@@ -0,0 +1,31 @@
+"""Benchmark suite for sqlite-vec-client."""
+
+from .config_loader import load_config
+from .operations import (
+    benchmark_add,
+    benchmark_delete_many,
+    benchmark_get_all,
+    benchmark_get_many,
+    benchmark_similarity_search,
+    benchmark_update_many,
+)
+from .reporter import export_to_csv, print_results, print_summary
+from .runner import run_benchmark_suite
+from .utils import generate_embeddings, generate_metadata, generate_texts
+
+__all__ = [
+    "load_config",
+    "run_benchmark_suite",
+    "print_results",
+    "print_summary",
+    "export_to_csv",
+    "benchmark_add",
+    "benchmark_get_many",
+    "benchmark_similarity_search",
+    "benchmark_update_many",
+    "benchmark_get_all",
+    "benchmark_delete_many",
+    "generate_embeddings",
+    "generate_texts",
+    "generate_metadata",
+]
diff --git a/benchmarks/__main__.py b/benchmarks/__main__.py
@@ -0,0 +1,4 @@
+from .benchmark_crud import main
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/benchmark_crud.py b/benchmarks/benchmark_crud.py
@@ -0,0 +1,55 @@
+"""Benchmark script for sqlite-vec-client CRUD operations.
+
+Measures performance of all operations with varying dataset sizes.
+"""
+
+import argparse
+
+from .config_loader import load_config
+from .reporter import export_to_csv, print_results, print_summary
+from .runner import run_benchmark_suite
+
+
+def main():
+    """Run benchmarks with different dataset sizes."""
+    parser = argparse.ArgumentParser(description="Run sqlite-vec-client benchmarks")
+    parser.add_argument("-c", "--config", type=str, help="Path to config YAML file")
+    parser.add_argument(
+        "-o", "--output", type=str, help="Output directory for CSV export"
+    )
+    args = parser.parse_args()
+
+    config = load_config(args.config)
+
+    print("SQLite-Vec-Client Performance Benchmark")
+    print("=" * 80)
+    print(f"Configuration: dim={config['dimension']}, distance={config['distance']}")
+    print("=" * 80)
+
+    dataset_sizes = config["dataset_sizes"]
+    table_format = config["table_format"]
+    db_modes = config.get("db_modes", ["file"])
+    all_results = {}
+
+    for db_mode in db_modes:
+        print(f"\n{'=' * 80}")
+        print(f"Testing with {db_mode.upper()} database")
+        print("=" * 80)
+
+        mode_results = {}
+        for size in dataset_sizes:
+            print(f"\nRunning benchmark with {size:,} records...")
+            results = run_benchmark_suite(size, config, db_mode)
+            mode_results[size] = results
+            print_results(results, table_format)
+
+        all_results[db_mode] = mode_results
+
+    print_summary(all_results, dataset_sizes, table_format)
+
+    if args.output:
+        export_to_csv(all_results, dataset_sizes, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/config.yaml b/benchmarks/config.yaml
@@ -0,0 +1,34 @@
+# Benchmark Configuration for sqlite-vec-client
+
+# Dataset sizes to test (number of records)
+dataset_sizes:
+  - 100
+  - 1000
+  - 10000
+  - 50000
+
+# Embedding dimension
+dimension: 384
+
+# Distance metric (cosine, l2, or inner_product)
+distance: cosine
+
+# Database modes to test (file, memory, or both)
+db_modes:
+  - file
+  - memory
+
+# Similarity search configuration
+similarity_search:
+  # Number of iterations for each search benchmark
+  iterations: 100
+  # Top-k values to test
+  top_k_values:
+    - 10
+    - 100
+
+# Batch size for get_all operation
+batch_size: 1000
+
+# Output format (grid, fancy_grid, simple, plain, or any tabulate format)
+table_format: grid
diff --git a/benchmarks/config_loader.py b/benchmarks/config_loader.py
@@ -0,0 +1,16 @@
+"""Configuration loader for benchmarks."""
+
+import os
+from typing import Any, Optional
+
+import yaml  # type: ignore[import-untyped]
+
+
+def load_config(config_path: Optional[str] = None) -> dict[str, Any]:
+    """Load benchmark configuration from YAML file."""
+    if config_path is None:
+        config_path = os.path.join(os.path.dirname(__file__), "config.yaml")
+
+    with open(config_path) as f:
+        result: dict[str, Any] = yaml.safe_load(f)
+        return result
diff --git a/benchmarks/operations.py b/benchmarks/operations.py
@@ -0,0 +1,101 @@
+"""Benchmark operations for CRUD methods."""
+
+import statistics
+import time
+
+from sqlite_vec_client import SQLiteVecClient
+
+from .utils import benchmark_operation
+
+
+def benchmark_add(
+    client: SQLiteVecClient,
+    texts: list[str],
+    embeddings: list[list[float]],
+    metadata: list[dict],
+) -> dict:
+    """Benchmark add operations."""
+    elapsed, rowids = benchmark_operation(
+        client.add, texts=texts, embeddings=embeddings, metadata=metadata
+    )
+    return {
+        "operation": "add",
+        "count": len(texts),
+        "time": elapsed,
+        "ops_per_sec": len(texts) / elapsed,
+    }
+
+
+def benchmark_get_many(client: SQLiteVecClient, rowids: list[int]) -> dict:
+    """Benchmark get_many operations."""
+    elapsed, _ = benchmark_operation(client.get_many, rowids)
+    return {
+        "operation": "get_many",
+        "count": len(rowids),
+        "time": elapsed,
+        "ops_per_sec": len(rowids) / elapsed,
+    }
+
+
+def benchmark_similarity_search(
+    client: SQLiteVecClient, embedding: list[float], top_k: int, iterations: int
+) -> dict:
+    """Benchmark similarity search operations."""
+    times = []
+    for _ in range(iterations):
+        elapsed, _ = benchmark_operation(
+            client.similarity_search, embedding=embedding, top_k=top_k
+        )
+        times.append(elapsed)
+
+    avg_time = statistics.mean(times)
+    return {
+        "operation": "similarity_search",
+        "top_k": top_k,
+        "iterations": iterations,
+        "avg_time": avg_time,
+        "min_time": min(times),
+        "max_time": max(times),
+        "searches_per_sec": 1 / avg_time,
+    }
+
+
+def benchmark_update_many(
+    client: SQLiteVecClient, rowids: list[int], texts: list[str]
+) -> dict:
+    """Benchmark update_many operations."""
+    updates = [(rid, text, None, None) for rid, text in zip(rowids, texts)]
+    elapsed, count = benchmark_operation(client.update_many, updates)
+    return {
+        "operation": "update_many",
+        "count": count,
+        "time": elapsed,
+        "ops_per_sec": count / elapsed,
+    }
+
+
+def benchmark_delete_many(client: SQLiteVecClient, rowids: list[int]) -> dict:
+    """Benchmark delete_many operations."""
+    elapsed, count = benchmark_operation(client.delete_many, rowids)
+    return {
+        "operation": "delete_many",
+        "count": count,
+        "time": elapsed,
+        "ops_per_sec": count / elapsed,
+    }
+
+
+def benchmark_get_all(
+    client: SQLiteVecClient, expected_count: int, batch_size: int
+) -> dict:
+    """Benchmark get_all operations."""
+
+    start = time.perf_counter()
+    count = sum(1 for _ in client.get_all(batch_size=batch_size))
+    elapsed = time.perf_counter() - start
+    return {
+        "operation": "get_all",
+        "count": count,
+        "time": elapsed,
+        "ops_per_sec": count / elapsed,
+    }
diff --git a/benchmarks/reporter.py b/benchmarks/reporter.py
@@ -0,0 +1,112 @@
+"""Benchmark results reporting."""
+
+import csv
+import os
+from datetime import datetime
+
+from tabulate import tabulate  # type: ignore[import-untyped]
+
+
+def print_results(results: list[dict], table_format: str):
+    """Print benchmark results in a formatted table."""
+    table_data = []
+    for result in results:
+        op = result["operation"]
+        if "top_k" in result:
+            op = f"{op} (k={result['top_k']})"
+
+        count = result.get("count", result.get("iterations", "-"))
+        time_val = result.get("time", result.get("avg_time", 0))
+        ops_per_sec = result.get("ops_per_sec", result.get("searches_per_sec", 0))
+
+        table_data.append([op, count, f"{time_val:.4f}", f"{ops_per_sec:.2f}"])
+
+    print(
+        "\n"
+        + tabulate(
+            table_data,
+            headers=["Operation", "Count", "Time (s)", "Ops/sec"],
+            tablefmt=table_format,
+        )
+    )
+
+
+def print_summary(
+    all_results: dict[str, dict[int, list[dict]]],
+    dataset_sizes: list[int],
+    table_format: str,
+):
+    """Print summary table of all benchmark results."""
+    for db_mode, mode_results in all_results.items():
+        print("\n" + "=" * 80)
+        print(f"SUMMARY - Operations per Second by Dataset Size ({db_mode.upper()} DB)")
+        print("=" * 80)
+
+        operations = [
+            "add",
+            "get_many",
+            "similarity_search",
+            "update_many",
+            "get_all",
+            "delete_many",
+        ]
+        summary_data = []
+        for op in operations:
+            row = [op]
+            for size in dataset_sizes:
+                matching = [r for r in mode_results[size] if r["operation"] == op]
+                if matching:
+                    ops_per_sec = matching[0].get(
+                        "ops_per_sec", matching[0].get("searches_per_sec", 0)
+                    )
+                    row.append(f"{ops_per_sec:,.0f}")
+                else:
+                    row.append("N/A")
+            summary_data.append(row)
+
+        headers = ["Operation"] + [f"{s:,}" for s in dataset_sizes]
+        print(tabulate(summary_data, headers=headers, tablefmt=table_format))
+        print("=" * 80)
+
+
+def export_to_csv(
+    all_results: dict[str, dict[int, list[dict]]],
+    dataset_sizes: list[int],
+    output_dir: str,
+):
+    """Export benchmark results to CSV files."""
+    os.makedirs(output_dir, exist_ok=True)
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+    for db_mode, mode_results in all_results.items():
+        for size in dataset_sizes:
+            filename = os.path.join(
+                output_dir, f"benchmark_{db_mode}_{size}_{timestamp}.csv"
+            )
+
+            with open(filename, "w", newline="") as f:
+                writer = csv.writer(f)
+                writer.writerow(["Operation", "ops_per_sec", "time_sec"])
+
+                operations = [
+                    "add",
+                    "get_many",
+                    "similarity_search",
+                    "update_many",
+                    "get_all",
+                    "delete_many",
+                ]
+                for op in operations:
+                    matching = [r for r in mode_results[size] if r["operation"] == op]
+                    if matching:
+                        ops_per_sec = matching[0].get(
+                            "ops_per_sec", matching[0].get("searches_per_sec", 0)
+                        )
+                        time_val = matching[0].get(
+                            "time", matching[0].get("avg_time", 0)
+                        )
+                        writer.writerow([op, f"{ops_per_sec:.2f}", f"{time_val:.4f}"])
+                    else:
+                        writer.writerow([op, "N/A", "N/A"])
+
+            print(f"Exported {db_mode} ({size} records) to: {filename}")
diff --git a/benchmarks/runner.py b/benchmarks/runner.py
diff --git a/benchmarks/utils.py b/benchmarks/utils.py
diff --git a/requirements-dev.txt b/requirements-dev.txt

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +from .benchmark_crud import main
++
 +if __name__ == "__main__":
 +    main()