feat: add knob for /v1/models to control if respond real models. (vllm-project#476)

Xunzhuo · rootfs · commit 7a3a257e055f · 2025-10-23T15:42:28.000Z
Signed-off-by: Huamin Chen &lt;hchen@redhat.com&gt;
diff --git a/.gitignore b/.gitignore
@@ -143,3 +143,13 @@ dashboard/backend/dashboard-backend.exe
 
 # Keep old HTML backup for reference
 dashboard/frontend/index.html.old
+
+
+* text=auto eol=lf
+*.png binary
+*.jpg binary
+*.jpeg binary
+*.gif binary
+*.pdf binary
+*.zip binary
+
diff --git a/config/config.yaml b/config/config.yaml
@@ -154,6 +154,13 @@ default_model: "qwen3"
 # Example: auto_model_name: "MoM"  # or any other name you prefer
 # auto_model_name: "MoM"
 
+# Include configured models in /v1/models list endpoint (optional, default: false)
+# When false (default): only the auto model name is returned in the /v1/models endpoint
+# When true: all models configured in model_config are also included in the /v1/models endpoint
+# This is useful for clients that need to discover all available models
+# Example: include_config_models_in_list: true
+# include_config_models_in_list: false
+
 # Reasoning family configurations
 reasoning_families:
   deepseek:
diff --git a/src/semantic-router/pkg/api/server.go b/src/semantic-router/pkg/api/server.go
@@ -721,7 +721,8 @@ func (s *ClassificationAPIServer) handleClassifierInfo(w http.ResponseWriter, _
 }
 
 // handleOpenAIModels handles OpenAI-compatible model listing at /v1/models
-// It returns all models discoverable from the router configuration plus the configured auto model name.
+// It returns the configured auto model name and optionally the underlying models from config.
+// Whether to include configured models is controlled by the config's IncludeConfigModelsInList setting (default: false)
 func (s *ClassificationAPIServer) handleOpenAIModels(w http.ResponseWriter, _ *http.Request) {
 	now := time.Now().Unix()
 
@@ -749,8 +750,8 @@ func (s *ClassificationAPIServer) handleOpenAIModels(w http.ResponseWriter, _ *h
 		})
 	}
 
-	// Append underlying models from config (if available)
-	if s.config != nil {
+	// Append underlying models from config (if available and configured to include them)
+	if s.config != nil && s.config.IncludeConfigModelsInList {
 		for _, m := range s.config.GetAllModels() {
 			// Skip if already added as the configured auto model name (avoid duplicates)
 			if m == s.config.GetEffectiveAutoModelName() {
diff --git a/src/semantic-router/pkg/api/server_test.go b/src/semantic-router/pkg/api/server_test.go
@@ -303,11 +303,12 @@ func TestBatchClassificationConfiguration(t *testing.T) {
 }
 
 func TestOpenAIModelsEndpoint(t *testing.T) {
+	// Test with default config (IncludeConfigModelsInList = false)
 	cfg := &config.RouterConfig{
 		VLLMEndpoints: []config.VLLMEndpoint{
 			{
 				Name:    "primary",
-				Address: "127.0.0.1", // Changed from localhost to IP address
+				Address: "127.0.0.1",
 				Port:    8000,
 				Weight:  1,
 			},
@@ -320,6 +321,7 @@ func TestOpenAIModelsEndpoint(t *testing.T) {
 				PreferredEndpoints: []string{"primary"},
 			},
 		},
+		IncludeConfigModelsInList: false,
 	}
 
 	apiServer := &ClassificationAPIServer{
@@ -357,13 +359,82 @@ func TestOpenAIModelsEndpoint(t *testing.T) {
 		}
 	}
 
-	// Must contain 'auto' and the configured models
-	if !got["auto"] {
-		t.Errorf("expected list to contain 'auto'")
+	// Must contain only 'MoM' (default auto model name) when IncludeConfigModelsInList is false
+	if !got["MoM"] {
+		t.Errorf("expected list to contain 'MoM', got: %v", got)
+	}
+	if len(resp.Data) != 1 {
+		t.Errorf("expected only 1 model (MoM), got %d: %v", len(resp.Data), got)
+	}
+}
+
+func TestOpenAIModelsEndpointWithConfigModels(t *testing.T) {
+	// Test with IncludeConfigModelsInList = true
+	cfg := &config.RouterConfig{
+		VLLMEndpoints: []config.VLLMEndpoint{
+			{
+				Name:    "primary",
+				Address: "127.0.0.1",
+				Port:    8000,
+				Weight:  1,
+			},
+		},
+		ModelConfig: map[string]config.ModelParams{
+			"gpt-4o-mini": {
+				PreferredEndpoints: []string{"primary"},
+			},
+			"llama-3.1-8b-instruct": {
+				PreferredEndpoints: []string{"primary"},
+			},
+		},
+		IncludeConfigModelsInList: true,
+	}
+
+	apiServer := &ClassificationAPIServer{
+		classificationSvc: services.NewPlaceholderClassificationService(),
+		config:            cfg,
+	}
+
+	req := httptest.NewRequest("GET", "/v1/models", nil)
+	rr := httptest.NewRecorder()
+
+	apiServer.handleOpenAIModels(rr, req)
+
+	if rr.Code != http.StatusOK {
+		t.Fatalf("expected 200 OK, got %d", rr.Code)
+	}
+
+	var resp OpenAIModelList
+	if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("failed to parse response: %v", err)
+	}
+
+	if resp.Object != "list" {
+		t.Errorf("expected object 'list', got %s", resp.Object)
+	}
+
+	// Build a set for easy lookup
+	got := map[string]bool{}
+	for _, m := range resp.Data {
+		got[m.ID] = true
+		if m.Object != "model" {
+			t.Errorf("expected each item.object to be 'model', got %s", m.Object)
+		}
+		if m.Created == 0 {
+			t.Errorf("expected created timestamp to be non-zero")
+		}
+	}
+
+	// Must contain 'MoM' (default auto model name) and the configured models when IncludeConfigModelsInList is true
+	if !got["MoM"] {
+		t.Errorf("expected list to contain 'MoM', got: %v", got)
 	}
 	if !got["gpt-4o-mini"] || !got["llama-3.1-8b-instruct"] {
 		t.Errorf("expected configured models to be present, got=%v", got)
 	}
+	if len(resp.Data) != 3 {
+		t.Errorf("expected 3 models, got %d", len(resp.Data))
+	}
 }
 
 // TestSystemPromptEndpointSecurity tests that system prompt endpoints are only accessible when explicitly enabled
diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go
@@ -58,6 +58,11 @@ type RouterConfig struct {
 	// For backward compatibility, "auto" is also accepted and treated as an alias
 	AutoModelName string `yaml:"auto_model_name,omitempty"`
 
+	// Include configured models in /v1/models list endpoint (default: false)
+	// When false, only the auto model name is returned
+	// When true, all models configured in model_config are also included
+	IncludeConfigModelsInList bool `yaml:"include_config_models_in_list,omitempty"`
+
 	// Default reasoning effort level (low, medium, high) when not specified per category
 	DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"`
 
diff --git a/src/semantic-router/pkg/extproc/models_endpoint_test.go b/src/semantic-router/pkg/extproc/models_endpoint_test.go
@@ -30,46 +30,71 @@ func TestHandleModelsRequest(t *testing.T) {
 				PreferredEndpoints: []string{"primary"},
 			},
 		},
+		IncludeConfigModelsInList: false, // Default: don't include configured models
 	}
 
-	router := &OpenAIRouter{
-		Config: cfg,
+	cfgWithModels := &config.RouterConfig{
+		VLLMEndpoints: []config.VLLMEndpoint{
+			{
+				Name:    "primary",
+				Address: "127.0.0.1",
+				Port:    8000,
+				Weight:  1,
+			},
+		},
+		ModelConfig: map[string]config.ModelParams{
+			"gpt-4o-mini": {
+				PreferredEndpoints: []string{"primary"},
+			},
+			"llama-3.1-8b-instruct": {
+				PreferredEndpoints: []string{"primary"},
+			},
+		},
+		IncludeConfigModelsInList: true, // Include configured models
 	}
 
 	tests := []struct {
 		name           string
+		config         *config.RouterConfig
 		path           string
 		expectedModels []string
 		expectedCount  int
 	}{
 		{
-			name:           "GET /v1/models - all models",
+			name:           "GET /v1/models - only auto model (default)",
+			config:         cfg,
 			path:           "/v1/models",
-			expectedModels: []string{"auto", "gpt-4o-mini", "llama-3.1-8b-instruct"},
-			expectedCount:  3,
+			expectedModels: []string{"MoM"},
+			expectedCount:  1,
 		},
 		{
-			name:           "GET /v1/models?model=auto - all models (no filtering implemented)",
-			path:           "/v1/models?model=auto",
-			expectedModels: []string{"auto", "gpt-4o-mini", "llama-3.1-8b-instruct"},
+			name:           "GET /v1/models - with include_config_models_in_list enabled",
+			config:         cfgWithModels,
+			path:           "/v1/models",
+			expectedModels: []string{"MoM", "gpt-4o-mini", "llama-3.1-8b-instruct"},
 			expectedCount:  3,
 		},
 		{
-			name:           "GET /v1/models?model=gpt-4o-mini - all models (no filtering)",
-			path:           "/v1/models?model=gpt-4o-mini",
-			expectedModels: []string{"auto", "gpt-4o-mini", "llama-3.1-8b-instruct"},
-			expectedCount:  3,
+			name:           "GET /v1/models?model=auto - only auto model (default)",
+			config:         cfg,
+			path:           "/v1/models?model=auto",
+			expectedModels: []string{"MoM"},
+			expectedCount:  1,
 		},
 		{
-			name:           "GET /v1/models?model= - all models (empty param)",
-			path:           "/v1/models?model=",
-			expectedModels: []string{"auto", "gpt-4o-mini", "llama-3.1-8b-instruct"},
+			name:           "GET /v1/models?model=auto - with include_config_models_in_list enabled",
+			config:         cfgWithModels,
+			path:           "/v1/models?model=auto",
+			expectedModels: []string{"MoM", "gpt-4o-mini", "llama-3.1-8b-instruct"},
 			expectedCount:  3,
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
+			router := &OpenAIRouter{
+				Config: tt.config,
+			}
 			response, err := router.handleModelsRequest(tt.path)
 			if err != nil {
 				t.Fatalf("handleModelsRequest failed: %v", err)
diff --git a/src/semantic-router/pkg/extproc/request_handler.go b/src/semantic-router/pkg/extproc/request_handler.go
@@ -941,7 +941,8 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 	}
 
 	// Check if route cache should be cleared (only for auto models, non-auto models handle this in their own path)
-	if originalModel == "auto" && r.shouldClearRouteCache() {
+	// isAutoModel already determined at the beginning of this function using IsAutoModelName
+	if isAutoModel && r.shouldClearRouteCache() {
 		// Access the CommonResponse that's already created in this function
 		if response.GetRequestBody() != nil && response.GetRequestBody().GetResponse() != nil {
 			response.GetRequestBody().GetResponse().ClearRouteCache = true
@@ -1142,6 +1143,7 @@ type OpenAIModelList struct {
 }
 
 // handleModelsRequest handles GET /v1/models requests and returns a direct response
+// Whether to include configured models is controlled by the config's IncludeConfigModelsInList setting (default: false)
 func (r *OpenAIRouter) handleModelsRequest(_ string) (*ext_proc.ProcessingResponse, error) {
 	now := time.Now().Unix()
 
@@ -1169,8 +1171,8 @@ func (r *OpenAIRouter) handleModelsRequest(_ string) (*ext_proc.ProcessingRespon
 		})
 	}
 
-	// Append underlying models from config (if available)
-	if r.Config != nil {
+	// Append underlying models from config (if available and configured to include them)
+	if r.Config != nil && r.Config.IncludeConfigModelsInList {
 		for _, m := range r.Config.GetAllModels() {
 			// Skip if already added as the configured auto model name (avoid duplicates)
 			if m == r.Config.GetEffectiveAutoModelName() {