vllm-project · rootfs · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025
@@ -75,8 +75,6 @@ vllm_endpoints:
   - name: endpoint1
     address: 127.0.0.1
     port: 8000
-    models:
-      - openai/gpt-oss-20b
     weight: 1
     health_check_path: /health
 

@@ -29,8 +29,6 @@ vllm_endpoints:
   - name: "local-endpoint"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "test-model"
     weight: 1
 
 model_config:

@@ -42,15 +42,11 @@ vllm_endpoints:
   - name: "qwen-endpoint"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "Model-A"
     weight: 1
     health_check_path: "/health"
   - name: "tinyllama-endpoint"
     address: "127.0.0.1"
     port: 8001
-    models:
-      - "Model-B"
     weight: 1
     health_check_path: "/health"
 

@@ -34,8 +34,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:

@@ -44,8 +44,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:

@@ -39,8 +39,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:

@@ -44,8 +44,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:

@@ -30,8 +30,6 @@ vllm_endpoints:
   - name: "mock"
     address: "172.28.0.10"
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
     health_check_path: "/health"
 

@@ -34,8 +34,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"  # IPv4 address - REQUIRED format
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:

@@ -100,9 +100,9 @@ default_reasoning_effort: medium
 # vLLM endpoints configuration
 vllm_endpoints:
   - name: "mock"
-    address: "http://127.0.0.1:8000"
-    models:
-      - "openai/gpt-oss-20b"
+    address: "127.0.0.1"
+    port: 8000
+    weight: 1
 
 # Usage Notes:
 # 1. System prompts are automatically injected based on query classification

@@ -7,7 +7,6 @@ interface VLLMEndpoint {
   name: string
   address: string
   port: number
-  models: string[]
   weight: number
   health_check_path: string
 }

@@ -34,8 +34,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"  # IPv4 address - REQUIRED format
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:

@@ -34,14 +34,10 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "10.104.192.205"  # IPv4 address - REQUIRED format
     port: 80
-    models:
-      - "llama3-8b"
     weight: 1
   - name: "endpoint2"
     address: "10.99.27.202"  # IPv4 address - REQUIRED format
     port: 80
-    models:
-      - "phi4-mini"
     weight: 1
 
 model_config:

@@ -32,14 +32,10 @@ vllm_endpoints:
   - name: "model-a-endpoint"
     address: "127.0.0.1"  # localhost in same pod
     port: 8000
-    models:
-      - "Model-A"
     weight: 1
   - name: "model-b-endpoint"
     address: "127.0.0.1"  # localhost in same pod
     port: 8001
-    models:
-      - "Model-B"
     weight: 1
 
 model_config:

@@ -309,10 +309,17 @@ func TestOpenAIModelsEndpoint(t *testing.T) {
 				Name:    "primary",
 				Address: "127.0.0.1", // Changed from localhost to IP address
 				Port:    8000,
-				Models:  []string{"gpt-4o-mini", "llama-3.1-8b-instruct"},
 				Weight:  1,
 			},
 		},
+		ModelConfig: map[string]config.ModelParams{
+			"gpt-4o-mini": {
+				PreferredEndpoints: []string{"primary"},
+			},
+			"llama-3.1-8b-instruct": {
+				PreferredEndpoints: []string{"primary"},
+			},
+		},
 	}
 
 	apiServer := &ClassificationAPIServer{

@@ -253,9 +253,6 @@ type VLLMEndpoint struct {
 	// Port of the vLLM endpoint
 	Port int `yaml:"port"`
 
-	// List of models served by this endpoint
-	Models []string `yaml:"models"`
-
 	// Load balancing weight for this endpoint
 	Weight int `yaml:"weight,omitempty"`
 }
@@ -604,32 +601,21 @@ func (c *RouterConfig) IsPromptGuardEnabled() bool {
 }
 
 // GetEndpointsForModel returns all endpoints that can serve the specified model
-// If the model has preferred endpoints configured, returns only those endpoints that are available
-// Otherwise, returns all endpoints that list the model in their Models array
+// Returns endpoints based on the model's preferred_endpoints configuration in model_config
 func (c *RouterConfig) GetEndpointsForModel(modelName string) []VLLMEndpoint {
-	var availableEndpoints []VLLMEndpoint
-
-	// First, find all endpoints that can serve this model
-	for _, endpoint := range c.VLLMEndpoints {
-		if slices.Contains(endpoint.Models, modelName) {
-			availableEndpoints = append(availableEndpoints, endpoint)
-		}
-	}
+	var endpoints []VLLMEndpoint
 
 	// Check if model has preferred endpoints configured
 	if modelConfig, ok := c.ModelConfig[modelName]; ok && len(modelConfig.PreferredEndpoints) > 0 {
-		var preferredEndpoints []VLLMEndpoint
-		for _, endpoint := range availableEndpoints {
-			if slices.Contains(modelConfig.PreferredEndpoints, endpoint.Name) {
-				preferredEndpoints = append(preferredEndpoints, endpoint)
+		// Return only the preferred endpoints
+		for _, endpointName := range modelConfig.PreferredEndpoints {
+			if endpoint, found := c.GetEndpointByName(endpointName); found {
+				endpoints = append(endpoints, *endpoint)
 			}
 		}
-		if len(preferredEndpoints) > 0 {
-			return preferredEndpoints
-		}
 	}
 
-	return availableEndpoints
+	return endpoints
 }
 
 // GetEndpointByName returns the endpoint with the specified name
@@ -642,18 +628,12 @@ func (c *RouterConfig) GetEndpointByName(name string) (*VLLMEndpoint, bool) {
 	return nil, false
 }
 
-// GetAllModels returns a list of all models available across all endpoints
+// GetAllModels returns a list of all models configured in model_config
 func (c *RouterConfig) GetAllModels() []string {
-	modelSet := make(map[string]bool)
 	var models []string
 
-	for _, endpoint := range c.VLLMEndpoints {
-		for _, model := range endpoint.Models {
-			if !modelSet[model] {
-				modelSet[model] = true
-				models = append(models, model)
-			}
-		}
+	for modelName := range c.ModelConfig {
+		models = append(models, modelName)
 	}
 
 	return models