Skip to content

Commit 7a3a257

Browse files
Xunzhuorootfs
authored andcommitted
feat: add knob for /v1/models to control if respond real models. (vllm-project#476)
Signed-off-by: Huamin Chen <[email protected]>
1 parent 3c080f3 commit 7a3a257

File tree

7 files changed

+146
-25
lines changed

7 files changed

+146
-25
lines changed

.gitignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,13 @@ dashboard/backend/dashboard-backend.exe
143143

144144
# Keep old HTML backup for reference
145145
dashboard/frontend/index.html.old
146+
147+
148+
* text=auto eol=lf
149+
*.png binary
150+
*.jpg binary
151+
*.jpeg binary
152+
*.gif binary
153+
*.pdf binary
154+
*.zip binary
155+

config/config.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,13 @@ default_model: "qwen3"
154154
# Example: auto_model_name: "MoM" # or any other name you prefer
155155
# auto_model_name: "MoM"
156156

157+
# Include configured models in /v1/models list endpoint (optional, default: false)
158+
# When false (default): only the auto model name is returned in the /v1/models endpoint
159+
# When true: all models configured in model_config are also included in the /v1/models endpoint
160+
# This is useful for clients that need to discover all available models
161+
# Example: include_config_models_in_list: true
162+
# include_config_models_in_list: false
163+
157164
# Reasoning family configurations
158165
reasoning_families:
159166
deepseek:

src/semantic-router/pkg/api/server.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,8 @@ func (s *ClassificationAPIServer) handleClassifierInfo(w http.ResponseWriter, _
721721
}
722722

723723
// handleOpenAIModels handles OpenAI-compatible model listing at /v1/models
724-
// It returns all models discoverable from the router configuration plus the configured auto model name.
724+
// It returns the configured auto model name and optionally the underlying models from config.
725+
// Whether to include configured models is controlled by the config's IncludeConfigModelsInList setting (default: false)
725726
func (s *ClassificationAPIServer) handleOpenAIModels(w http.ResponseWriter, _ *http.Request) {
726727
now := time.Now().Unix()
727728

@@ -749,8 +750,8 @@ func (s *ClassificationAPIServer) handleOpenAIModels(w http.ResponseWriter, _ *h
749750
})
750751
}
751752

752-
// Append underlying models from config (if available)
753-
if s.config != nil {
753+
// Append underlying models from config (if available and configured to include them)
754+
if s.config != nil && s.config.IncludeConfigModelsInList {
754755
for _, m := range s.config.GetAllModels() {
755756
// Skip if already added as the configured auto model name (avoid duplicates)
756757
if m == s.config.GetEffectiveAutoModelName() {

src/semantic-router/pkg/api/server_test.go

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -303,11 +303,12 @@ func TestBatchClassificationConfiguration(t *testing.T) {
303303
}
304304

305305
func TestOpenAIModelsEndpoint(t *testing.T) {
306+
// Test with default config (IncludeConfigModelsInList = false)
306307
cfg := &config.RouterConfig{
307308
VLLMEndpoints: []config.VLLMEndpoint{
308309
{
309310
Name: "primary",
310-
Address: "127.0.0.1", // Changed from localhost to IP address
311+
Address: "127.0.0.1",
311312
Port: 8000,
312313
Weight: 1,
313314
},
@@ -320,6 +321,7 @@ func TestOpenAIModelsEndpoint(t *testing.T) {
320321
PreferredEndpoints: []string{"primary"},
321322
},
322323
},
324+
IncludeConfigModelsInList: false,
323325
}
324326

325327
apiServer := &ClassificationAPIServer{
@@ -357,13 +359,82 @@ func TestOpenAIModelsEndpoint(t *testing.T) {
357359
}
358360
}
359361

360-
// Must contain 'auto' and the configured models
361-
if !got["auto"] {
362-
t.Errorf("expected list to contain 'auto'")
362+
// Must contain only 'MoM' (default auto model name) when IncludeConfigModelsInList is false
363+
if !got["MoM"] {
364+
t.Errorf("expected list to contain 'MoM', got: %v", got)
365+
}
366+
if len(resp.Data) != 1 {
367+
t.Errorf("expected only 1 model (MoM), got %d: %v", len(resp.Data), got)
368+
}
369+
}
370+
371+
func TestOpenAIModelsEndpointWithConfigModels(t *testing.T) {
372+
// Test with IncludeConfigModelsInList = true
373+
cfg := &config.RouterConfig{
374+
VLLMEndpoints: []config.VLLMEndpoint{
375+
{
376+
Name: "primary",
377+
Address: "127.0.0.1",
378+
Port: 8000,
379+
Weight: 1,
380+
},
381+
},
382+
ModelConfig: map[string]config.ModelParams{
383+
"gpt-4o-mini": {
384+
PreferredEndpoints: []string{"primary"},
385+
},
386+
"llama-3.1-8b-instruct": {
387+
PreferredEndpoints: []string{"primary"},
388+
},
389+
},
390+
IncludeConfigModelsInList: true,
391+
}
392+
393+
apiServer := &ClassificationAPIServer{
394+
classificationSvc: services.NewPlaceholderClassificationService(),
395+
config: cfg,
396+
}
397+
398+
req := httptest.NewRequest("GET", "/v1/models", nil)
399+
rr := httptest.NewRecorder()
400+
401+
apiServer.handleOpenAIModels(rr, req)
402+
403+
if rr.Code != http.StatusOK {
404+
t.Fatalf("expected 200 OK, got %d", rr.Code)
405+
}
406+
407+
var resp OpenAIModelList
408+
if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
409+
t.Fatalf("failed to parse response: %v", err)
410+
}
411+
412+
if resp.Object != "list" {
413+
t.Errorf("expected object 'list', got %s", resp.Object)
414+
}
415+
416+
// Build a set for easy lookup
417+
got := map[string]bool{}
418+
for _, m := range resp.Data {
419+
got[m.ID] = true
420+
if m.Object != "model" {
421+
t.Errorf("expected each item.object to be 'model', got %s", m.Object)
422+
}
423+
if m.Created == 0 {
424+
t.Errorf("expected created timestamp to be non-zero")
425+
}
426+
}
427+
428+
// Must contain 'MoM' (default auto model name) and the configured models when IncludeConfigModelsInList is true
429+
if !got["MoM"] {
430+
t.Errorf("expected list to contain 'MoM', got: %v", got)
363431
}
364432
if !got["gpt-4o-mini"] || !got["llama-3.1-8b-instruct"] {
365433
t.Errorf("expected configured models to be present, got=%v", got)
366434
}
435+
if len(resp.Data) != 3 {
436+
t.Errorf("expected 3 models, got %d", len(resp.Data))
437+
}
367438
}
368439

369440
// TestSystemPromptEndpointSecurity tests that system prompt endpoints are only accessible when explicitly enabled

src/semantic-router/pkg/config/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ type RouterConfig struct {
5858
// For backward compatibility, "auto" is also accepted and treated as an alias
5959
AutoModelName string `yaml:"auto_model_name,omitempty"`
6060

61+
// Include configured models in /v1/models list endpoint (default: false)
62+
// When false, only the auto model name is returned
63+
// When true, all models configured in model_config are also included
64+
IncludeConfigModelsInList bool `yaml:"include_config_models_in_list,omitempty"`
65+
6166
// Default reasoning effort level (low, medium, high) when not specified per category
6267
DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"`
6368

src/semantic-router/pkg/extproc/models_endpoint_test.go

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,46 +30,71 @@ func TestHandleModelsRequest(t *testing.T) {
3030
PreferredEndpoints: []string{"primary"},
3131
},
3232
},
33+
IncludeConfigModelsInList: false, // Default: don't include configured models
3334
}
3435

35-
router := &OpenAIRouter{
36-
Config: cfg,
36+
cfgWithModels := &config.RouterConfig{
37+
VLLMEndpoints: []config.VLLMEndpoint{
38+
{
39+
Name: "primary",
40+
Address: "127.0.0.1",
41+
Port: 8000,
42+
Weight: 1,
43+
},
44+
},
45+
ModelConfig: map[string]config.ModelParams{
46+
"gpt-4o-mini": {
47+
PreferredEndpoints: []string{"primary"},
48+
},
49+
"llama-3.1-8b-instruct": {
50+
PreferredEndpoints: []string{"primary"},
51+
},
52+
},
53+
IncludeConfigModelsInList: true, // Include configured models
3754
}
3855

3956
tests := []struct {
4057
name string
58+
config *config.RouterConfig
4159
path string
4260
expectedModels []string
4361
expectedCount int
4462
}{
4563
{
46-
name: "GET /v1/models - all models",
64+
name: "GET /v1/models - only auto model (default)",
65+
config: cfg,
4766
path: "/v1/models",
48-
expectedModels: []string{"auto", "gpt-4o-mini", "llama-3.1-8b-instruct"},
49-
expectedCount: 3,
67+
expectedModels: []string{"MoM"},
68+
expectedCount: 1,
5069
},
5170
{
52-
name: "GET /v1/models?model=auto - all models (no filtering implemented)",
53-
path: "/v1/models?model=auto",
54-
expectedModels: []string{"auto", "gpt-4o-mini", "llama-3.1-8b-instruct"},
71+
name: "GET /v1/models - with include_config_models_in_list enabled",
72+
config: cfgWithModels,
73+
path: "/v1/models",
74+
expectedModels: []string{"MoM", "gpt-4o-mini", "llama-3.1-8b-instruct"},
5575
expectedCount: 3,
5676
},
5777
{
58-
name: "GET /v1/models?model=gpt-4o-mini - all models (no filtering)",
59-
path: "/v1/models?model=gpt-4o-mini",
60-
expectedModels: []string{"auto", "gpt-4o-mini", "llama-3.1-8b-instruct"},
61-
expectedCount: 3,
78+
name: "GET /v1/models?model=auto - only auto model (default)",
79+
config: cfg,
80+
path: "/v1/models?model=auto",
81+
expectedModels: []string{"MoM"},
82+
expectedCount: 1,
6283
},
6384
{
64-
name: "GET /v1/models?model= - all models (empty param)",
65-
path: "/v1/models?model=",
66-
expectedModels: []string{"auto", "gpt-4o-mini", "llama-3.1-8b-instruct"},
85+
name: "GET /v1/models?model=auto - with include_config_models_in_list enabled",
86+
config: cfgWithModels,
87+
path: "/v1/models?model=auto",
88+
expectedModels: []string{"MoM", "gpt-4o-mini", "llama-3.1-8b-instruct"},
6789
expectedCount: 3,
6890
},
6991
}
7092

7193
for _, tt := range tests {
7294
t.Run(tt.name, func(t *testing.T) {
95+
router := &OpenAIRouter{
96+
Config: tt.config,
97+
}
7398
response, err := router.handleModelsRequest(tt.path)
7499
if err != nil {
75100
t.Fatalf("handleModelsRequest failed: %v", err)

src/semantic-router/pkg/extproc/request_handler.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,8 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
941941
}
942942

943943
// Check if route cache should be cleared (only for auto models, non-auto models handle this in their own path)
944-
if originalModel == "auto" && r.shouldClearRouteCache() {
944+
// isAutoModel already determined at the beginning of this function using IsAutoModelName
945+
if isAutoModel && r.shouldClearRouteCache() {
945946
// Access the CommonResponse that's already created in this function
946947
if response.GetRequestBody() != nil && response.GetRequestBody().GetResponse() != nil {
947948
response.GetRequestBody().GetResponse().ClearRouteCache = true
@@ -1142,6 +1143,7 @@ type OpenAIModelList struct {
11421143
}
11431144

11441145
// handleModelsRequest handles GET /v1/models requests and returns a direct response
1146+
// Whether to include configured models is controlled by the config's IncludeConfigModelsInList setting (default: false)
11451147
func (r *OpenAIRouter) handleModelsRequest(_ string) (*ext_proc.ProcessingResponse, error) {
11461148
now := time.Now().Unix()
11471149

@@ -1169,8 +1171,8 @@ func (r *OpenAIRouter) handleModelsRequest(_ string) (*ext_proc.ProcessingRespon
11691171
})
11701172
}
11711173

1172-
// Append underlying models from config (if available)
1173-
if r.Config != nil {
1174+
// Append underlying models from config (if available and configured to include them)
1175+
if r.Config != nil && r.Config.IncludeConfigModelsInList {
11741176
for _, m := range r.Config.GetAllModels() {
11751177
// Skip if already added as the configured auto model name (avoid duplicates)
11761178
if m == r.Config.GetEffectiveAutoModelName() {

0 commit comments

Comments
 (0)