Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions bench/bench_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,16 @@ def plot_metric(metric: str, out_path: Path):
# Determine modes to plot, optionally limiting to top-N by mean of metric
all_modes = sorted({m for c in cats for m in cat_by_mode.get(c, {}).keys()})
if len(all_modes) > 0:

def _mean(values):
vals = [v for v in values if v is not None]
return sum(vals) / len(vals) if vals else float("nan")

if args.max_modes is not None and args.max_modes > 0 and len(all_modes) > args.max_modes:
if (
args.max_modes is not None
and args.max_modes > 0
and len(all_modes) > args.max_modes
):
mode_means = []
for mode in all_modes:
vals = [cat_by_mode.get(c, {}).get(mode, {}).get(metric) for c in cats]
Expand Down Expand Up @@ -279,7 +284,9 @@ def _mean(values):
ax.set_xlim(left_xlim, right_xlim)
ylabel = metric.replace("_", " ")
ax.set_ylabel(ylabel, fontsize=int(18 * args.font_scale))
ax.set_title(f"Per-category {ylabel} per-mode values", fontsize=int(22 * args.font_scale))
ax.set_title(
f"Per-category {ylabel} per-mode values", fontsize=int(22 * args.font_scale)
)
ax.tick_params(axis="both", which="major", labelsize=int(14 * args.font_scale))

# Build a figure-level legend below the axes and reserve space to prevent overlap
Expand All @@ -290,7 +297,7 @@ def _mean(values):
legend_rows = 2
legend_ncol = max(1, (num_series + legend_rows - 1) // legend_rows)
num_rows = legend_rows
scale = (args.font_scale / 1.6)
scale = args.font_scale / 1.6
# Reserve generous space for long rotated tick labels and multi-row legend
bottom_reserved = (0.28 * scale) + (0.12 * num_rows * scale)
bottom_reserved = max(0.24, min(0.60, bottom_reserved))
Expand Down
12 changes: 9 additions & 3 deletions bench/router_reason_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,9 @@ def build_extra_body_for_model(
# reasoning: True -> ON, False -> OFF, None -> base

lower = model_name.lower()
if (("ds" in lower) or ("deepseek" in lower)) and ("v31" in lower or "v3.1" in lower or "v3" in lower):
if (("ds" in lower) or ("deepseek" in lower)) and (
"v31" in lower or "v3.1" in lower or "v3" in lower
):
if reasoning is True:
return {"chat_template_kwargs": {"thinking": True}}
if reasoning is None or reasoning is False:
Expand Down Expand Up @@ -535,7 +537,9 @@ def run_variants(q: Dict[str, Any]) -> List[Dict[str, Any]]:

with ThreadPoolExecutor(max_workers=concurrent_requests) as executor:
futures = [executor.submit(run_variants, q) for q in questions_data]
for future in tqdm(futures, total=len(futures), desc=f"Evaluating {model} (vLLM modes)"):
for future in tqdm(
futures, total=len(futures), desc=f"Evaluating {model} (vLLM modes)"
):
results.extend(future.result())

return pd.DataFrame(results)
Expand Down Expand Up @@ -584,7 +588,9 @@ def run_all_modes(q: Dict[str, Any]) -> List[Dict[str, Any]]:

with ThreadPoolExecutor(max_workers=concurrent_requests) as executor:
futures = [executor.submit(run_all_modes, q) for q in questions]
for future in tqdm(futures, total=len(futures), desc=f"Evaluating {model} (policies)"):
for future in tqdm(
futures, total=len(futures), desc=f"Evaluating {model} (policies)"
):
per_call_records.extend(future.result())

calls_df = pd.DataFrame(per_call_records)
Expand Down
1 change: 0 additions & 1 deletion src/semantic-router/pkg/extproc/request_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,6 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
selectedEndpoint = endpointAddress
log.Printf("Selected endpoint address: %s for model: %s", selectedEndpoint, originalModel)
} else {
// TOOD(Xunzhuo): pick a random endpoint from the list of all available endpoints
log.Printf("Warning: No endpoint found for model %s, using fallback", originalModel)
}
setHeaders := []*core.HeaderValueOption{}
Expand Down
Loading