Skip to content

Commit c707e83

Browse files
committed
fix plot issue
Signed-off-by: Huamin Chen <[email protected]>
1 parent a7122f5 commit c707e83

File tree

2 files changed

+65
-16
lines changed

2 files changed

+65
-16
lines changed

bench/comprehensive_bench.sh

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,10 @@ run_dataset_benchmark() {
215215
--router-models "$ROUTER_MODEL" \
216216
--output-dir "$OUTPUT_BASE/router_$dataset" \
217217
--seed 42
218-
218+
219219
# Extract and save router metrics immediately
220220
extract_and_save_metrics "$dataset" "Router" "$OUTPUT_BASE/router_$dataset"
221-
221+
222222
# vLLM benchmark
223223
echo -e "${YELLOW} ⚡ Running vLLM evaluation...${NC}"
224224
python3 -m vllm_semantic_router_bench.router_reason_bench_multi_dataset \
@@ -245,13 +245,24 @@ generate_plots() {
245245
for dataset in "${!DATASET_CONFIGS[@]}"; do
246246
echo -e "${YELLOW} 📊 Plotting $dataset results...${NC}"
247247

248-
python3 -m vllm_semantic_router_bench.bench_plot \
249-
--router-dir "$OUTPUT_BASE/router_$dataset" \
250-
--vllm-dir "$OUTPUT_BASE/vllm_$dataset" \
251-
--output-dir "$OUTPUT_BASE/plots_$dataset" \
252-
--dataset-name "$dataset"
248+
# Find the summary.json files
249+
ROUTER_SUMMARY=$(find "$OUTPUT_BASE/router_$dataset" -name "summary.json" -type f | head -1)
250+
VLLM_SUMMARY=$(find "$OUTPUT_BASE/vllm_$dataset" -name "summary.json" -type f | head -1)
251+
252+
if [[ -f "$VLLM_SUMMARY" ]]; then
253+
PLOT_CMD="python3 -m vllm_semantic_router_bench.bench_plot --summary \"$VLLM_SUMMARY\" --out-dir \"$OUTPUT_BASE/plots_$dataset\""
254+
255+
if [[ -f "$ROUTER_SUMMARY" ]]; then
256+
PLOT_CMD="$PLOT_CMD --router-summary \"$ROUTER_SUMMARY\""
257+
fi
258+
259+
echo -e "${BLUE} Running: $PLOT_CMD${NC}"
260+
eval $PLOT_CMD
261+
else
262+
echo -e "${RED} ⚠️ No vLLM summary.json found for $dataset, skipping plots${NC}"
263+
fi
253264
done
254-
265+
255266
echo -e "${GREEN} ✅ All plots generated${NC}"
256267
echo ""
257268
}

bench/vllm_semantic_router_bench/router_reason_bench_multi_dataset.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -452,10 +452,31 @@ def evaluate_model_router_transparent(
452452
)
453453
)
454454

455-
for future in tqdm(
456-
futures, total=len(futures), desc=f"Evaluating {model} (Router-Transparent)"
457-
):
458-
results.append(future.result())
455+
try:
456+
for future in tqdm(
457+
futures,
458+
total=len(futures),
459+
desc=f"Evaluating {model} (Router-Transparent)",
460+
):
461+
results.append(future.result())
462+
except KeyboardInterrupt:
463+
print(
464+
"\n⚠️ Router evaluation interrupted by user. Saving partial results..."
465+
)
466+
# Cancel remaining futures
467+
for future in futures:
468+
future.cancel()
469+
# Collect results from completed futures
470+
for future in futures:
471+
if future.done() and not future.cancelled():
472+
try:
473+
results.append(future.result())
474+
except Exception:
475+
pass # Skip failed results
476+
if not results:
477+
print("❌ No router results to save.")
478+
raise
479+
print(f"✅ Saved {len(results)} partial router results.")
459480

460481
return pd.DataFrame(results)
461482

@@ -558,10 +579,27 @@ def run_variants(q: Question) -> List[Dict[str, Any]]:
558579

559580
with ThreadPoolExecutor(max_workers=concurrent_requests) as executor:
560581
futures = [executor.submit(run_variants, q) for q in questions]
561-
for future in tqdm(
562-
futures, total=len(futures), desc=f"Evaluating {model} (vLLM modes)"
563-
):
564-
results.extend(future.result())
582+
try:
583+
for future in tqdm(
584+
futures, total=len(futures), desc=f"Evaluating {model} (vLLM modes)"
585+
):
586+
results.extend(future.result())
587+
except KeyboardInterrupt:
588+
print("\n⚠️ Benchmark interrupted by user. Saving partial results...")
589+
# Cancel remaining futures
590+
for future in futures:
591+
future.cancel()
592+
# Collect results from completed futures
593+
for future in futures:
594+
if future.done() and not future.cancelled():
595+
try:
596+
results.extend(future.result())
597+
except Exception:
598+
pass # Skip failed results
599+
if not results:
600+
print("❌ No results to save.")
601+
raise
602+
print(f"✅ Saved {len(results)} partial results.")
565603

566604
return pd.DataFrame(results)
567605

0 commit comments

Comments
 (0)