diff --git a/bench/bench_plot.py b/bench/bench_plot.py index ff60bda3..fdab467c 100644 --- a/bench/bench_plot.py +++ b/bench/bench_plot.py @@ -178,11 +178,16 @@ def plot_metric(metric: str, out_path: Path): # Determine modes to plot, optionally limiting to top-N by mean of metric all_modes = sorted({m for c in cats for m in cat_by_mode.get(c, {}).keys()}) if len(all_modes) > 0: + def _mean(values): vals = [v for v in values if v is not None] return sum(vals) / len(vals) if vals else float("nan") - if args.max_modes is not None and args.max_modes > 0 and len(all_modes) > args.max_modes: + if ( + args.max_modes is not None + and args.max_modes > 0 + and len(all_modes) > args.max_modes + ): mode_means = [] for mode in all_modes: vals = [cat_by_mode.get(c, {}).get(mode, {}).get(metric) for c in cats] @@ -279,7 +284,9 @@ def _mean(values): ax.set_xlim(left_xlim, right_xlim) ylabel = metric.replace("_", " ") ax.set_ylabel(ylabel, fontsize=int(18 * args.font_scale)) - ax.set_title(f"Per-category {ylabel} per-mode values", fontsize=int(22 * args.font_scale)) + ax.set_title( + f"Per-category {ylabel} per-mode values", fontsize=int(22 * args.font_scale) + ) ax.tick_params(axis="both", which="major", labelsize=int(14 * args.font_scale)) # Build a figure-level legend below the axes and reserve space to prevent overlap @@ -290,7 +297,7 @@ def _mean(values): legend_rows = 2 legend_ncol = max(1, (num_series + legend_rows - 1) // legend_rows) num_rows = legend_rows - scale = (args.font_scale / 1.6) + scale = args.font_scale / 1.6 # Reserve generous space for long rotated tick labels and multi-row legend bottom_reserved = (0.28 * scale) + (0.12 * num_rows * scale) bottom_reserved = max(0.24, min(0.60, bottom_reserved)) diff --git a/bench/router_reason_bench.py b/bench/router_reason_bench.py index 3db353af..1bf666a4 100644 --- a/bench/router_reason_bench.py +++ b/bench/router_reason_bench.py @@ -355,7 +355,9 @@ def build_extra_body_for_model( # reasoning: True -> ON, False -> OFF, None -> base lower = model_name.lower() - if (("ds" in lower) or ("deepseek" in lower)) and ("v31" in lower or "v3.1" in lower or "v3" in lower): + if (("ds" in lower) or ("deepseek" in lower)) and ( + "v31" in lower or "v3.1" in lower or "v3" in lower + ): if reasoning is True: return {"chat_template_kwargs": {"thinking": True}} if reasoning is None or reasoning is False: @@ -535,7 +537,9 @@ def run_variants(q: Dict[str, Any]) -> List[Dict[str, Any]]: with ThreadPoolExecutor(max_workers=concurrent_requests) as executor: futures = [executor.submit(run_variants, q) for q in questions_data] - for future in tqdm(futures, total=len(futures), desc=f"Evaluating {model} (vLLM modes)"): + for future in tqdm( + futures, total=len(futures), desc=f"Evaluating {model} (vLLM modes)" + ): results.extend(future.result()) return pd.DataFrame(results) @@ -584,7 +588,9 @@ def run_all_modes(q: Dict[str, Any]) -> List[Dict[str, Any]]: with ThreadPoolExecutor(max_workers=concurrent_requests) as executor: futures = [executor.submit(run_all_modes, q) for q in questions] - for future in tqdm(futures, total=len(futures), desc=f"Evaluating {model} (policies)"): + for future in tqdm( + futures, total=len(futures), desc=f"Evaluating {model} (policies)" + ): per_call_records.extend(future.result()) calls_df = pd.DataFrame(per_call_records) diff --git a/src/semantic-router/pkg/extproc/request_handler.go b/src/semantic-router/pkg/extproc/request_handler.go index f0353bfa..440189dc 100644 --- a/src/semantic-router/pkg/extproc/request_handler.go +++ b/src/semantic-router/pkg/extproc/request_handler.go @@ -445,7 +445,6 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe selectedEndpoint = endpointAddress log.Printf("Selected endpoint address: %s for model: %s", selectedEndpoint, originalModel) } else { - // TOOD(Xunzhuo): pick a random endpoint from the list of all available endpoints log.Printf("Warning: No endpoint found for model %s, using fallback", originalModel) } setHeaders := []*core.HeaderValueOption{}