Skip to content

Commit 8ad0c46

Browse files
authored
Feat: Support Path Suffix for LLM Endpoints (#949)
* Feat: Support Path Suffix for LLM Endpoints Signed-off-by: bitliu <bitliu@tencent.com> * Feat: Support Path Suffix for LLM Endpoints Signed-off-by: bitliu <bitliu@tencent.com> --------- Signed-off-by: bitliu <bitliu@tencent.com>
1 parent ac423d7 commit 8ad0c46

File tree

4 files changed

+64
-21
lines changed

4 files changed

+64
-21
lines changed

src/semantic-router/cmd/main.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,12 @@ func main() {
154154
// Initialize embedding models BEFORE creating server, this ensures Qwen3/Gemma models are ready when semantic cache is initialized
155155
// Use the already loaded config instead of calling config.Load() again
156156
if cfg.Qwen3ModelPath != "" || cfg.GemmaModelPath != "" {
157-
if err := candle_binding.InitEmbeddingModels(
157+
if initErr := candle_binding.InitEmbeddingModels(
158158
cfg.Qwen3ModelPath,
159159
cfg.GemmaModelPath,
160160
cfg.EmbeddingModels.UseCPU,
161-
); err != nil {
162-
logging.Errorf("Failed to initialize embedding models: %v", err)
161+
); initErr != nil {
162+
logging.Errorf("Failed to initialize embedding models: %v", initErr)
163163
logging.Warnf("Embedding API endpoints will return placeholder embeddings")
164164
} else {
165165
logging.Infof("Embedding models initialized successfully")

src/vllm-sr/cli/config_generator.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,23 @@ def generate_envoy_config_from_user_config(
9292
uses_dns = False
9393

9494
for endpoint in model.endpoints:
95-
# Parse endpoint (host:port or just host)
96-
if ":" in endpoint.endpoint:
97-
host, port = endpoint.endpoint.split(":", 1)
95+
# Parse endpoint: can be "host", "host:port", or "host/path" or "host:port/path"
96+
endpoint_str = endpoint.endpoint
97+
path = ""
98+
99+
# Extract path if present (e.g., "host/path" or "host:port/path")
100+
if "/" in endpoint_str:
101+
# Split by first "/" to separate host[:port] from path
102+
parts = endpoint_str.split("/", 1)
103+
endpoint_str = parts[0] # host or host:port
104+
path = "/" + parts[1] # /path
105+
106+
# Parse host and port
107+
if ":" in endpoint_str:
108+
host, port = endpoint_str.split(":", 1)
98109
port = int(port)
99110
else:
100-
host = endpoint.endpoint
111+
host = endpoint_str
101112
# Default port based on protocol
102113
port = 443 if endpoint.protocol == "https" else 80
103114

@@ -117,6 +128,7 @@ def generate_envoy_config_from_user_config(
117128
"name": endpoint.name,
118129
"address": host,
119130
"port": int(port),
131+
"path": path,
120132
"weight": endpoint.weight,
121133
"protocol": endpoint.protocol,
122134
"is_https": is_https,
@@ -131,13 +143,21 @@ def generate_envoy_config_from_user_config(
131143
# Domain names → LOGICAL_DNS, IP addresses → STATIC
132144
cluster_type = "LOGICAL_DNS" if uses_dns else "STATIC"
133145

146+
# Determine path prefix - use the first endpoint's path if all endpoints have the same path
147+
path_prefix = ""
148+
if endpoints:
149+
first_path = endpoints[0].get("path", "")
150+
if first_path and all(ep.get("path", "") == first_path for ep in endpoints):
151+
path_prefix = first_path
152+
134153
models.append(
135154
{
136155
"name": model.name,
137156
"cluster_name": cluster_name,
138157
"endpoints": endpoints,
139158
"cluster_type": cluster_type,
140159
"has_https": has_https,
160+
"path_prefix": path_prefix,
141161
}
142162
)
143163

src/vllm-sr/cli/merger.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -226,25 +226,40 @@ def translate_providers_to_router_format(providers) -> Dict[str, Any]:
226226

227227
# Add endpoints for this model
228228
for endpoint in model.endpoints:
229-
# Parse endpoint (host:port or just host)
230-
if ":" in endpoint.endpoint:
231-
host, port = endpoint.endpoint.split(":", 1)
229+
# Parse endpoint: can be "host", "host:port", or "host/path" or "host:port/path"
230+
endpoint_str = endpoint.endpoint
231+
path = ""
232+
233+
# Extract path if present (e.g., "host/path" or "host:port/path")
234+
if "/" in endpoint_str:
235+
# Split by first "/" to separate host[:port] from path
236+
parts = endpoint_str.split("/", 1)
237+
endpoint_str = parts[0] # host or host:port
238+
path = "/" + parts[1] # /path
239+
240+
# Parse host and port
241+
if ":" in endpoint_str:
242+
host, port = endpoint_str.split(":", 1)
232243
port = int(port)
233244
else:
234-
host = endpoint.endpoint
245+
host = endpoint_str
235246
# Use default port based on protocol
236247
port = 443 if endpoint.protocol == "https" else 80
237248

238-
vllm_endpoints.append(
239-
{
240-
"name": f"{model.name}_{endpoint.name}",
241-
"address": host,
242-
"port": port,
243-
"weight": endpoint.weight,
244-
"protocol": endpoint.protocol,
245-
"model": model.name,
246-
}
247-
)
249+
endpoint_config = {
250+
"name": f"{model.name}_{endpoint.name}",
251+
"address": host,
252+
"port": port,
253+
"weight": endpoint.weight,
254+
"protocol": endpoint.protocol,
255+
"model": model.name,
256+
}
257+
258+
# Add path if present
259+
if path:
260+
endpoint_config["path"] = path
261+
262+
vllm_endpoints.append(endpoint_config)
248263

249264
return {
250265
"vllm_endpoints": vllm_endpoints,

src/vllm-sr/cli/templates/envoy.template.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ static_resources:
3838
cluster: {{ model.cluster_name }}_cluster
3939
timeout: {{ listener.timeout | default('300s') }}
4040
idleTimeout: 300s
41+
{% if model.path_prefix %}
42+
# Prepend path prefix to all requests (e.g., /openapi + /v1/chat/completions = /openapi/v1/chat/completions)
43+
regex_rewrite:
44+
pattern:
45+
google_re2: {}
46+
regex: "^/"
47+
substitution: "{{ model.path_prefix }}/"
48+
{% endif %}
4149
{% endfor %}
4250
# Default route (no x-selected-model header)
4351
- match:

0 commit comments

Comments
 (0)