Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions config/config-mcp-classifier-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ vllm_endpoints:
- name: endpoint1
address: 127.0.0.1
port: 8000
models:
- openai/gpt-oss-20b
weight: 1
health_check_path: /health

Expand Down
2 changes: 0 additions & 2 deletions config/config.development.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ vllm_endpoints:
- name: "local-endpoint"
address: "127.0.0.1"
port: 8000
models:
- "test-model"
weight: 1

model_config:
Expand Down
4 changes: 0 additions & 4 deletions config/config.e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,11 @@ vllm_endpoints:
- name: "qwen-endpoint"
address: "127.0.0.1"
port: 8000
models:
- "Model-A"
weight: 1
health_check_path: "/health"
- name: "tinyllama-endpoint"
address: "127.0.0.1"
port: 8001
models:
- "Model-B"
weight: 1
health_check_path: "/health"

Expand Down
2 changes: 0 additions & 2 deletions config/config.production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ vllm_endpoints:
- name: "endpoint1"
address: "127.0.0.1"
port: 8000
models:
- "openai/gpt-oss-20b"
weight: 1

model_config:
Expand Down
2 changes: 0 additions & 2 deletions config/config.recipe-accuracy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ vllm_endpoints:
- name: "endpoint1"
address: "127.0.0.1"
port: 8000
models:
- "openai/gpt-oss-20b"
weight: 1

model_config:
Expand Down
2 changes: 0 additions & 2 deletions config/config.recipe-latency.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ vllm_endpoints:
- name: "endpoint1"
address: "127.0.0.1"
port: 8000
models:
- "openai/gpt-oss-20b"
weight: 1

model_config:
Expand Down
2 changes: 0 additions & 2 deletions config/config.recipe-token-efficiency.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ vllm_endpoints:
- name: "endpoint1"
address: "127.0.0.1"
port: 8000
models:
- "openai/gpt-oss-20b"
weight: 1

model_config:
Expand Down
2 changes: 0 additions & 2 deletions config/config.testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ vllm_endpoints:
- name: "mock"
address: "172.28.0.10"
port: 8000
models:
- "openai/gpt-oss-20b"
weight: 1
health_check_path: "/health"

Expand Down
2 changes: 0 additions & 2 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ vllm_endpoints:
- name: "endpoint1"
address: "127.0.0.1" # IPv4 address - REQUIRED format
port: 8000
models:
- "openai/gpt-oss-20b"
weight: 1

model_config:
Expand Down
6 changes: 3 additions & 3 deletions config/examples/system_prompt_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ default_reasoning_effort: medium
# vLLM endpoints configuration
vllm_endpoints:
- name: "mock"
address: "http://127.0.0.1:8000"
models:
- "openai/gpt-oss-20b"
address: "127.0.0.1"
port: 8000
weight: 1

# Usage Notes:
# 1. System prompts are automatically injected based on query classification
Expand Down
1 change: 0 additions & 1 deletion dashboard/frontend/src/pages/ConfigPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ interface VLLMEndpoint {
name: string
address: string
port: number
models: string[]
weight: number
health_check_path: string
}
Expand Down
2 changes: 0 additions & 2 deletions deploy/kubernetes/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ vllm_endpoints:
- name: "endpoint1"
address: "127.0.0.1" # IPv4 address - REQUIRED format
port: 8000
models:
- "openai/gpt-oss-20b"
weight: 1

model_config:
Expand Down
4 changes: 0 additions & 4 deletions deploy/kubernetes/istio/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,10 @@ vllm_endpoints:
- name: "endpoint1"
address: "10.104.192.205" # IPv4 address - REQUIRED format
port: 80
models:
- "llama3-8b"
weight: 1
- name: "endpoint2"
address: "10.99.27.202" # IPv4 address - REQUIRED format
port: 80
models:
- "phi4-mini"
weight: 1

model_config:
Expand Down
4 changes: 0 additions & 4 deletions deploy/openshift/config-openshift.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,10 @@ vllm_endpoints:
- name: "model-a-endpoint"
address: "127.0.0.1" # localhost in same pod
port: 8000
models:
- "Model-A"
weight: 1
- name: "model-b-endpoint"
address: "127.0.0.1" # localhost in same pod
port: 8001
models:
- "Model-B"
weight: 1

model_config:
Expand Down
9 changes: 8 additions & 1 deletion src/semantic-router/pkg/api/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,17 @@ func TestOpenAIModelsEndpoint(t *testing.T) {
Name: "primary",
Address: "127.0.0.1", // Changed from localhost to IP address
Port: 8000,
Models: []string{"gpt-4o-mini", "llama-3.1-8b-instruct"},
Weight: 1,
},
},
ModelConfig: map[string]config.ModelParams{
"gpt-4o-mini": {
PreferredEndpoints: []string{"primary"},
},
"llama-3.1-8b-instruct": {
PreferredEndpoints: []string{"primary"},
},
},
}

apiServer := &ClassificationAPIServer{
Expand Down
40 changes: 10 additions & 30 deletions src/semantic-router/pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,6 @@ type VLLMEndpoint struct {
// Port of the vLLM endpoint
Port int `yaml:"port"`

// List of models served by this endpoint
Models []string `yaml:"models"`

// Load balancing weight for this endpoint
Weight int `yaml:"weight,omitempty"`
}
Expand Down Expand Up @@ -604,32 +601,21 @@ func (c *RouterConfig) IsPromptGuardEnabled() bool {
}

// GetEndpointsForModel returns all endpoints that can serve the specified model
// If the model has preferred endpoints configured, returns only those endpoints that are available
// Otherwise, returns all endpoints that list the model in their Models array
// Returns endpoints based on the model's preferred_endpoints configuration in model_config
func (c *RouterConfig) GetEndpointsForModel(modelName string) []VLLMEndpoint {
var availableEndpoints []VLLMEndpoint

// First, find all endpoints that can serve this model
for _, endpoint := range c.VLLMEndpoints {
if slices.Contains(endpoint.Models, modelName) {
availableEndpoints = append(availableEndpoints, endpoint)
}
}
var endpoints []VLLMEndpoint

// Check if model has preferred endpoints configured
if modelConfig, ok := c.ModelConfig[modelName]; ok && len(modelConfig.PreferredEndpoints) > 0 {
var preferredEndpoints []VLLMEndpoint
for _, endpoint := range availableEndpoints {
if slices.Contains(modelConfig.PreferredEndpoints, endpoint.Name) {
preferredEndpoints = append(preferredEndpoints, endpoint)
// Return only the preferred endpoints
for _, endpointName := range modelConfig.PreferredEndpoints {
if endpoint, found := c.GetEndpointByName(endpointName); found {
endpoints = append(endpoints, *endpoint)
}
}
if len(preferredEndpoints) > 0 {
return preferredEndpoints
}
}

return availableEndpoints
return endpoints
}

// GetEndpointByName returns the endpoint with the specified name
Expand All @@ -642,18 +628,12 @@ func (c *RouterConfig) GetEndpointByName(name string) (*VLLMEndpoint, bool) {
return nil, false
}

// GetAllModels returns a list of all models available across all endpoints
// GetAllModels returns a list of all models configured in model_config
func (c *RouterConfig) GetAllModels() []string {
modelSet := make(map[string]bool)
var models []string

for _, endpoint := range c.VLLMEndpoints {
for _, model := range endpoint.Models {
if !modelSet[model] {
modelSet[model] = true
models = append(models, model)
}
}
for modelName := range c.ModelConfig {
models = append(models, modelName)
}

return models
Expand Down
Loading
Loading