Skip to content

Commit fc003b0

Browse files
committed
feat(viewer): extract viewer module with evaluation gallery and badges
1 parent 27c4130 commit fc003b0

File tree

3 files changed

+2906
-1995
lines changed

3 files changed

+2906
-1995
lines changed

openadapt_ml/cloud/local.py

Lines changed: 80 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -292,24 +292,45 @@ def cmd_serve(args: argparse.Namespace) -> int:
292292
from openadapt_ml.training.trainer import regenerate_local_dashboard
293293

294294
port = args.port
295-
current_dir = get_current_output_dir()
296295

297-
if not current_dir.exists():
298-
print(f"Error: {current_dir} not found. Run training first.")
299-
return 1
296+
# Determine what to serve: benchmark directory or training output
297+
if hasattr(args, 'benchmark') and args.benchmark:
298+
serve_dir = Path(args.benchmark).expanduser().resolve()
299+
if not serve_dir.exists():
300+
print(f"Error: Benchmark directory not found: {serve_dir}")
301+
return 1
302+
303+
# Regenerate benchmark viewer if needed
304+
if not args.no_regenerate:
305+
print("Regenerating benchmark viewer...")
306+
try:
307+
from openadapt_ml.training.benchmark_viewer import generate_benchmark_viewer
308+
generate_benchmark_viewer(serve_dir)
309+
except Exception as e:
310+
print(f"Warning: Could not regenerate benchmark viewer: {e}")
311+
312+
start_page = "benchmark.html"
313+
else:
314+
serve_dir = get_current_output_dir()
300315

301-
# Regenerate dashboard and viewer with latest code before serving
302-
if not args.no_regenerate:
303-
print("Regenerating dashboard and viewer...")
304-
try:
305-
regenerate_local_dashboard(str(current_dir))
306-
# Also regenerate viewer if comparison data exists
307-
_regenerate_viewer_if_possible(current_dir)
308-
except Exception as e:
309-
print(f"Warning: Could not regenerate: {e}")
316+
if not serve_dir.exists():
317+
print(f"Error: {serve_dir} not found. Run training first.")
318+
return 1
319+
320+
# Regenerate dashboard and viewer with latest code before serving
321+
if not args.no_regenerate:
322+
print("Regenerating dashboard and viewer...")
323+
try:
324+
regenerate_local_dashboard(str(serve_dir))
325+
# Also regenerate viewer if comparison data exists
326+
_regenerate_viewer_if_possible(serve_dir)
327+
except Exception as e:
328+
print(f"Warning: Could not regenerate: {e}")
310329

311-
# Serve from the current job directory
312-
os.chdir(current_dir)
330+
start_page = "dashboard.html"
331+
332+
# Serve from the specified directory
333+
os.chdir(serve_dir)
313334

314335
# Custom handler with /api/stop support
315336
quiet_mode = args.quiet
@@ -324,7 +345,7 @@ def log_message(self, format, *log_args):
324345
def do_POST(self):
325346
if self.path == '/api/stop':
326347
# Create stop signal file
327-
stop_file = current_dir / "STOP_TRAINING"
348+
stop_file = serve_dir / "STOP_TRAINING"
328349
stop_file.touch()
329350
self.send_response(200)
330351
self.send_header('Content-Type', 'application/json')
@@ -344,9 +365,9 @@ def do_OPTIONS(self):
344365
self.end_headers()
345366

346367
with socketserver.TCPServer(("", port), StopHandler) as httpd:
347-
url = f"http://localhost:{port}/dashboard.html"
348-
print(f"\nServing training output at: {url}")
349-
print(f"Job directory: {current_dir}")
368+
url = f"http://localhost:{port}/{start_page}"
369+
print(f"\nServing at: {url}")
370+
print(f"Directory: {serve_dir}")
350371
print("Press Ctrl+C to stop\n")
351372

352373
if args.open:
@@ -413,6 +434,36 @@ def cmd_viewer(args: argparse.Namespace) -> int:
413434
return 0
414435

415436

437+
def cmd_benchmark_viewer(args: argparse.Namespace) -> int:
438+
"""Generate benchmark viewer from benchmark results."""
439+
from openadapt_ml.training.benchmark_viewer import generate_benchmark_viewer
440+
441+
benchmark_dir = Path(args.benchmark_dir).expanduser().resolve()
442+
if not benchmark_dir.exists():
443+
print(f"Error: Benchmark directory not found: {benchmark_dir}")
444+
return 1
445+
446+
print(f"\n{'='*50}")
447+
print("GENERATING BENCHMARK VIEWER")
448+
print(f"{'='*50}")
449+
print(f"Benchmark dir: {benchmark_dir}")
450+
print()
451+
452+
try:
453+
viewer_path = generate_benchmark_viewer(benchmark_dir)
454+
print(f"\nSuccess! Benchmark viewer generated at: {viewer_path}")
455+
456+
if args.open:
457+
webbrowser.open(str(viewer_path))
458+
459+
return 0
460+
except Exception as e:
461+
print(f"Error generating benchmark viewer: {e}")
462+
import traceback
463+
traceback.print_exc()
464+
return 1
465+
466+
416467
def cmd_compare(args: argparse.Namespace) -> int:
417468
"""Run human vs AI comparison on local checkpoint."""
418469
capture_path = Path(args.capture).expanduser().resolve()
@@ -468,6 +519,9 @@ def main():
468519
# Regenerate viewer
469520
uv run python -m openadapt_ml.cloud.local viewer --open
470521
522+
# Generate benchmark viewer
523+
uv run python -m openadapt_ml.cloud.local benchmark-viewer benchmark_results/test_run --open
524+
471525
# Run comparison
472526
uv run python -m openadapt_ml.cloud.local compare --capture ~/captures/my-workflow --checkpoint checkpoints/model
473527
"""
@@ -498,13 +552,20 @@ def main():
498552
p_serve.add_argument("--quiet", "-q", action="store_true", help="Suppress request logging")
499553
p_serve.add_argument("--no-regenerate", action="store_true",
500554
help="Skip regenerating dashboard/viewer (serve existing files)")
555+
p_serve.add_argument("--benchmark", help="Serve benchmark results directory instead of training output")
501556
p_serve.set_defaults(func=cmd_serve)
502557

503558
# viewer
504559
p_viewer = subparsers.add_parser("viewer", help="Regenerate viewer")
505560
p_viewer.add_argument("--open", action="store_true", help="Open in browser")
506561
p_viewer.set_defaults(func=cmd_viewer)
507562

563+
# benchmark_viewer
564+
p_benchmark = subparsers.add_parser("benchmark-viewer", help="Generate benchmark viewer")
565+
p_benchmark.add_argument("benchmark_dir", help="Path to benchmark results directory")
566+
p_benchmark.add_argument("--open", action="store_true", help="Open viewer in browser")
567+
p_benchmark.set_defaults(func=cmd_benchmark_viewer)
568+
508569
# compare
509570
p_compare = subparsers.add_parser("compare", help="Run human vs AI comparison")
510571
p_compare.add_argument("--capture", required=True, help="Path to capture directory")

0 commit comments

Comments
 (0)