Skip to content

Commit 9e3f05c

Browse files
wolfieschclaude
andcommitted
feat(benchmarks): add FGP daemon API benchmarks
- fgp_api_benchmark.py: Tests Gmail/Calendar/GitHub daemons - gmail_workflow_benchmark.py: Gmail API vs browser comparison - hn_workflow_benchmark.py: FGP vs agent-browser vs Playwright MCP - fgp_vs_mcp_benchmark.py: Core FGP vs MCP comparison - Include all benchmark results JSON files Results: FGP 19x faster than MCP, all methods 100% success Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent ccddc66 commit 9e3f05c

29 files changed

+13995
-0
lines changed

benchmarks/fgp_api_benchmark.py

Lines changed: 387 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,387 @@
1+
#!/usr/bin/env python3
2+
"""
3+
FGP API Benchmark - Gmail, Calendar, GitHub Daemons
4+
5+
Tests individual methods and multi-step workflows across all FGP daemons.
6+
7+
Daemons tested:
8+
- Gmail (PyO3 + Google API)
9+
- Calendar (PyO3 + Google API)
10+
- GitHub (Native Rust + gh CLI)
11+
12+
Usage:
13+
python3 fgp_api_benchmark.py --iterations 3
14+
15+
CHANGELOG (recent first, max 5 entries)
16+
01/14/2026 - Initial implementation (Claude)
17+
"""
18+
19+
from __future__ import annotations
20+
21+
import argparse
22+
import json
23+
import socket
24+
import statistics
25+
import time
26+
import uuid
27+
from dataclasses import asdict, dataclass, field
28+
from datetime import datetime
29+
from pathlib import Path
30+
from typing import Any
31+
32+
# Socket paths
33+
SOCKETS = {
34+
"gmail": Path.home() / ".fgp" / "services" / "gmail" / "daemon.sock",
35+
"calendar": Path.home() / ".fgp" / "services" / "calendar" / "daemon.sock",
36+
"github": Path.home() / ".fgp" / "services" / "github" / "daemon.sock",
37+
}
38+
RESULTS_DIR = Path(__file__).parent / "results"
39+
40+
41+
@dataclass
42+
class BenchResult:
43+
"""Result of a single benchmark call."""
44+
daemon: str
45+
method: str
46+
iteration: int
47+
latency_ms: float
48+
success: bool
49+
payload_size: int = 0
50+
error: str | None = None
51+
52+
53+
@dataclass
54+
class MethodSummary:
55+
"""Statistical summary for a method."""
56+
daemon: str
57+
method: str
58+
iterations: int
59+
success_rate: float
60+
mean_ms: float
61+
median_ms: float
62+
min_ms: float
63+
max_ms: float
64+
std_dev_ms: float
65+
mean_payload: float
66+
67+
68+
@dataclass
69+
class BenchmarkReport:
70+
"""Complete benchmark report."""
71+
generated_at: str
72+
iterations: int
73+
results: list[BenchResult] = field(default_factory=list)
74+
summaries: list[MethodSummary] = field(default_factory=list)
75+
76+
77+
class FGPClient:
78+
"""Generic FGP daemon client."""
79+
80+
def __init__(self, daemon_name: str, socket_path: Path):
81+
self.daemon = daemon_name
82+
self.socket_path = socket_path
83+
84+
def is_available(self) -> bool:
85+
return self.socket_path.exists()
86+
87+
def call(self, method: str, params: dict | None = None) -> tuple[dict, float]:
88+
"""Call daemon method, return (result, latency_ms)."""
89+
start = time.perf_counter()
90+
91+
request = {
92+
"id": str(uuid.uuid4()),
93+
"v": 1,
94+
"method": method,
95+
"params": params or {},
96+
}
97+
98+
try:
99+
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
100+
sock.settimeout(30)
101+
sock.connect(str(self.socket_path))
102+
sock.sendall((json.dumps(request) + "\n").encode())
103+
104+
response_data = b""
105+
while True:
106+
chunk = sock.recv(65536)
107+
if not chunk:
108+
break
109+
response_data += chunk
110+
if b"\n" in response_data:
111+
break
112+
113+
sock.close()
114+
latency_ms = (time.perf_counter() - start) * 1000
115+
116+
response = json.loads(response_data.decode().strip())
117+
if not response.get("ok"):
118+
raise Exception(response.get("error", {}).get("message", "Unknown error"))
119+
120+
return response.get("result", {}), latency_ms
121+
122+
except Exception as e:
123+
latency_ms = (time.perf_counter() - start) * 1000
124+
raise Exception(f"FGP call failed: {e}") from e
125+
126+
127+
def bench_method(
128+
client: FGPClient,
129+
method: str,
130+
params: dict | None,
131+
iterations: int,
132+
label: str | None = None,
133+
) -> list[BenchResult]:
134+
"""Benchmark a single method."""
135+
results = []
136+
display = label or method
137+
138+
for i in range(iterations):
139+
try:
140+
result, latency = client.call(method, params)
141+
payload = len(json.dumps(result)) if result else 0
142+
results.append(BenchResult(
143+
daemon=client.daemon,
144+
method=display,
145+
iteration=i + 1,
146+
latency_ms=latency,
147+
success=True,
148+
payload_size=payload,
149+
))
150+
except Exception as e:
151+
results.append(BenchResult(
152+
daemon=client.daemon,
153+
method=display,
154+
iteration=i + 1,
155+
latency_ms=0,
156+
success=False,
157+
error=str(e)[:200],
158+
))
159+
time.sleep(0.2) # Small delay between iterations
160+
161+
return results
162+
163+
164+
def compute_summary(results: list[BenchResult], daemon: str, method: str) -> MethodSummary:
165+
"""Compute statistics for a method."""
166+
filtered = [r for r in results if r.daemon == daemon and r.method == method and r.success]
167+
all_results = [r for r in results if r.daemon == daemon and r.method == method]
168+
169+
if not filtered:
170+
return MethodSummary(
171+
daemon=daemon,
172+
method=method,
173+
iterations=len(all_results),
174+
success_rate=0.0,
175+
mean_ms=0.0,
176+
median_ms=0.0,
177+
min_ms=0.0,
178+
max_ms=0.0,
179+
std_dev_ms=0.0,
180+
mean_payload=0.0,
181+
)
182+
183+
latencies = [r.latency_ms for r in filtered]
184+
payloads = [r.payload_size for r in filtered]
185+
186+
return MethodSummary(
187+
daemon=daemon,
188+
method=method,
189+
iterations=len(all_results),
190+
success_rate=len(filtered) / len(all_results) if all_results else 0.0,
191+
mean_ms=statistics.mean(latencies),
192+
median_ms=statistics.median(latencies),
193+
min_ms=min(latencies),
194+
max_ms=max(latencies),
195+
std_dev_ms=statistics.stdev(latencies) if len(latencies) > 1 else 0.0,
196+
mean_payload=statistics.mean(payloads),
197+
)
198+
199+
200+
def print_daemon_table(summaries: list[MethodSummary], daemon: str) -> None:
201+
"""Print results table for a daemon."""
202+
daemon_summaries = [s for s in summaries if s.daemon == daemon]
203+
if not daemon_summaries:
204+
return
205+
206+
print(f"\n{daemon.upper()} Daemon")
207+
print("-" * 70)
208+
print(f"{'Method':<20} {'Mean':>10} {'Min':>10} {'Max':>10} {'Payload':>12} {'Success':>8}")
209+
print("-" * 70)
210+
211+
for s in daemon_summaries:
212+
if s.success_rate > 0:
213+
print(
214+
f"{s.method:<20} {s.mean_ms:>9.0f}ms {s.min_ms:>9.0f}ms {s.max_ms:>9.0f}ms "
215+
f"{s.mean_payload:>10.0f}B {s.success_rate*100:>7.0f}%"
216+
)
217+
else:
218+
print(f"{s.method:<20} {'FAILED':>10} {'-':>10} {'-':>10} {'-':>12} {0:>7.0f}%")
219+
220+
221+
def main():
222+
parser = argparse.ArgumentParser(description="FGP API Benchmark")
223+
parser.add_argument("--iterations", type=int, default=3, help="Iterations per method")
224+
args = parser.parse_args()
225+
226+
print("FGP API Benchmark")
227+
print("=" * 70)
228+
print(f"Testing: Gmail, Calendar, GitHub daemons")
229+
print(f"Iterations: {args.iterations}")
230+
print()
231+
232+
# Initialize clients
233+
clients = {}
234+
for name, path in SOCKETS.items():
235+
client = FGPClient(name, path)
236+
if client.is_available():
237+
clients[name] = client
238+
print(f" [OK] {name} daemon")
239+
else:
240+
print(f" [SKIP] {name} daemon (not running)")
241+
242+
if not clients:
243+
print("\nNo daemons available. Exiting.")
244+
return
245+
246+
print()
247+
results: list[BenchResult] = []
248+
249+
# =========================================================================
250+
# GMAIL BENCHMARKS
251+
# =========================================================================
252+
if "gmail" in clients:
253+
gmail = clients["gmail"]
254+
print("[Gmail Daemon]")
255+
256+
# inbox
257+
print(" gmail.inbox...")
258+
results.extend(bench_method(gmail, "gmail.inbox", {"limit": 5}, args.iterations, "inbox"))
259+
260+
# search
261+
print(" gmail.search...")
262+
results.extend(bench_method(gmail, "gmail.search", {"query": "from:*", "limit": 5}, args.iterations, "search"))
263+
264+
# Get first thread ID for thread test
265+
print(" gmail.thread...")
266+
try:
267+
inbox_result, _ = gmail.call("gmail.inbox", {"limit": 1})
268+
emails = inbox_result.get("emails", [])
269+
if emails:
270+
thread_id = emails[0].get("thread_id")
271+
results.extend(bench_method(gmail, "gmail.thread", {"thread_id": thread_id}, args.iterations, "thread"))
272+
else:
273+
print(" (skipped - no emails)")
274+
except Exception as e:
275+
print(f" (failed: {e})")
276+
277+
# unread
278+
print(" gmail.unread...")
279+
results.extend(bench_method(gmail, "gmail.unread", {"limit": 5}, args.iterations, "unread"))
280+
281+
# =========================================================================
282+
# CALENDAR BENCHMARKS
283+
# =========================================================================
284+
if "calendar" in clients:
285+
calendar = clients["calendar"]
286+
print("\n[Calendar Daemon]")
287+
288+
# today
289+
print(" calendar.today...")
290+
results.extend(bench_method(calendar, "calendar.today", {}, args.iterations, "today"))
291+
292+
# upcoming
293+
print(" calendar.upcoming...")
294+
results.extend(bench_method(calendar, "calendar.upcoming", {"days": 7, "limit": 10}, args.iterations, "upcoming"))
295+
296+
# search
297+
print(" calendar.search...")
298+
results.extend(bench_method(calendar, "calendar.search", {"query": "meeting", "days": 30}, args.iterations, "search"))
299+
300+
# free_slots
301+
print(" calendar.free_slots...")
302+
results.extend(bench_method(calendar, "calendar.free_slots", {"duration_minutes": 30, "days": 7}, args.iterations, "free_slots"))
303+
304+
# =========================================================================
305+
# GITHUB BENCHMARKS
306+
# =========================================================================
307+
if "github" in clients:
308+
github = clients["github"]
309+
print("\n[GitHub Daemon]")
310+
311+
# user
312+
print(" github.user...")
313+
results.extend(bench_method(github, "github.user", {}, args.iterations, "user"))
314+
315+
# repos
316+
print(" github.repos...")
317+
results.extend(bench_method(github, "github.repos", {"limit": 10}, args.iterations, "repos"))
318+
319+
# notifications
320+
print(" github.notifications...")
321+
results.extend(bench_method(github, "github.notifications", {}, args.iterations, "notifications"))
322+
323+
# issues (on a specific repo)
324+
print(" github.issues...")
325+
results.extend(bench_method(github, "github.issues", {"repo": "wolfiesch/fgp-daemon", "limit": 5}, args.iterations, "issues"))
326+
327+
# =========================================================================
328+
# COMPUTE SUMMARIES
329+
# =========================================================================
330+
summaries = []
331+
methods_by_daemon = {}
332+
for r in results:
333+
key = (r.daemon, r.method)
334+
if key not in methods_by_daemon:
335+
methods_by_daemon[key] = True
336+
337+
for (daemon, method) in methods_by_daemon.keys():
338+
summaries.append(compute_summary(results, daemon, method))
339+
340+
# =========================================================================
341+
# PRINT RESULTS
342+
# =========================================================================
343+
print("\n" + "=" * 70)
344+
print("RESULTS")
345+
print("=" * 70)
346+
347+
for daemon in ["gmail", "calendar", "github"]:
348+
print_daemon_table(summaries, daemon)
349+
350+
# =========================================================================
351+
# SAVE RESULTS
352+
# =========================================================================
353+
report = BenchmarkReport(
354+
generated_at=datetime.now().isoformat(),
355+
iterations=args.iterations,
356+
results=results,
357+
summaries=summaries,
358+
)
359+
360+
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
361+
output_path = RESULTS_DIR / f"fgp_api_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
362+
363+
report_dict = {
364+
"generated_at": report.generated_at,
365+
"iterations": report.iterations,
366+
"results": [asdict(r) for r in report.results],
367+
"summaries": [asdict(s) for s in report.summaries],
368+
}
369+
370+
with open(output_path, "w") as f:
371+
json.dump(report_dict, f, indent=2)
372+
373+
print(f"\nResults saved to: {output_path}")
374+
375+
# Print quick summary
376+
print("\n" + "=" * 70)
377+
print("QUICK SUMMARY")
378+
print("=" * 70)
379+
for daemon in ["gmail", "calendar", "github"]:
380+
daemon_summaries = [s for s in summaries if s.daemon == daemon and s.success_rate > 0]
381+
if daemon_summaries:
382+
avg = statistics.mean([s.mean_ms for s in daemon_summaries])
383+
print(f"{daemon:<10}: avg {avg:.0f}ms across {len(daemon_summaries)} methods")
384+
385+
386+
if __name__ == "__main__":
387+
main()

0 commit comments

Comments
 (0)