fix plot issue

rootfs · rootfs · commit c707e8394b10 · 2025-09-13T13:21:33.000Z
Signed-off-by: Huamin Chen &lt;hchen@redhat.com&gt;
diff --git a/bench/comprehensive_bench.sh b/bench/comprehensive_bench.sh
@@ -215,10 +215,10 @@ run_dataset_benchmark() {
         --router-models "$ROUTER_MODEL" \
         --output-dir "$OUTPUT_BASE/router_$dataset" \
         --seed 42
-    
+
     # Extract and save router metrics immediately
     extract_and_save_metrics "$dataset" "Router" "$OUTPUT_BASE/router_$dataset"
-    
+
     # vLLM benchmark  
     echo -e "${YELLOW}  ⚡ Running vLLM evaluation...${NC}"
     python3 -m vllm_semantic_router_bench.router_reason_bench_multi_dataset \
@@ -245,13 +245,24 @@ generate_plots() {
     for dataset in "${!DATASET_CONFIGS[@]}"; do
         echo -e "${YELLOW}  📊 Plotting $dataset results...${NC}"
         
-        python3 -m vllm_semantic_router_bench.bench_plot \
-            --router-dir "$OUTPUT_BASE/router_$dataset" \
-            --vllm-dir "$OUTPUT_BASE/vllm_$dataset" \
-            --output-dir "$OUTPUT_BASE/plots_$dataset" \
-            --dataset-name "$dataset"
+        # Find the summary.json files
+        ROUTER_SUMMARY=$(find "$OUTPUT_BASE/router_$dataset" -name "summary.json" -type f | head -1)
+        VLLM_SUMMARY=$(find "$OUTPUT_BASE/vllm_$dataset" -name "summary.json" -type f | head -1)
+
+        if [[ -f "$VLLM_SUMMARY" ]]; then
+            PLOT_CMD="python3 -m vllm_semantic_router_bench.bench_plot --summary \"$VLLM_SUMMARY\" --out-dir \"$OUTPUT_BASE/plots_$dataset\""
+
+            if [[ -f "$ROUTER_SUMMARY" ]]; then
+                PLOT_CMD="$PLOT_CMD --router-summary \"$ROUTER_SUMMARY\""
+            fi
+
+            echo -e "${BLUE}    Running: $PLOT_CMD${NC}"
+            eval $PLOT_CMD
+        else
+            echo -e "${RED}    ⚠️  No vLLM summary.json found for $dataset, skipping plots${NC}"
+        fi
     done
-    
+
     echo -e "${GREEN}  ✅ All plots generated${NC}"
     echo ""
 }
diff --git a/bench/vllm_semantic_router_bench/router_reason_bench_multi_dataset.py b/bench/vllm_semantic_router_bench/router_reason_bench_multi_dataset.py
@@ -452,10 +452,31 @@ def evaluate_model_router_transparent(
                 )
             )
 
-        for future in tqdm(
-            futures, total=len(futures), desc=f"Evaluating {model} (Router-Transparent)"
-        ):
-            results.append(future.result())
+        try:
+            for future in tqdm(
+                futures,
+                total=len(futures),
+                desc=f"Evaluating {model} (Router-Transparent)",
+            ):
+                results.append(future.result())
+        except KeyboardInterrupt:
+            print(
+                "\n⚠️  Router evaluation interrupted by user. Saving partial results..."
+            )
+            # Cancel remaining futures
+            for future in futures:
+                future.cancel()
+            # Collect results from completed futures
+            for future in futures:
+                if future.done() and not future.cancelled():
+                    try:
+                        results.append(future.result())
+                    except Exception:
+                        pass  # Skip failed results
+            if not results:
+                print("❌ No router results to save.")
+                raise
+            print(f"✅ Saved {len(results)} partial router results.")
 
     return pd.DataFrame(results)
 
@@ -558,10 +579,27 @@ def run_variants(q: Question) -> List[Dict[str, Any]]:
 
     with ThreadPoolExecutor(max_workers=concurrent_requests) as executor:
         futures = [executor.submit(run_variants, q) for q in questions]
-        for future in tqdm(
-            futures, total=len(futures), desc=f"Evaluating {model} (vLLM modes)"
-        ):
-            results.extend(future.result())
+        try:
+            for future in tqdm(
+                futures, total=len(futures), desc=f"Evaluating {model} (vLLM modes)"
+            ):
+                results.extend(future.result())
+        except KeyboardInterrupt:
+            print("\n⚠️  Benchmark interrupted by user. Saving partial results...")
+            # Cancel remaining futures
+            for future in futures:
+                future.cancel()
+            # Collect results from completed futures
+            for future in futures:
+                if future.done() and not future.cancelled():
+                    try:
+                        results.extend(future.result())
+                    except Exception:
+                        pass  # Skip failed results
+            if not results:
+                print("❌ No results to save.")
+                raise
+            print(f"✅ Saved {len(results)} partial results.")
 
     return pd.DataFrame(results)