vllm-project
diff --git a/‎src/semantic-router/cmd/main.go‎
Lines changed: 7 additions & 7 deletions b/‎src/semantic-router/cmd/main.go‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/semantic-router/go.mod‎
Lines changed: 2 additions & 1 deletion b/‎src/semantic-router/go.mod‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/semantic-router/go.sum‎
Lines changed: 2 additions & 0 deletions b/‎src/semantic-router/go.sum‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/semantic-router/pkg/apiserver/route_model_info.go‎
Lines changed: 11 additions & 11 deletions b/‎src/semantic-router/pkg/apiserver/route_model_info.go‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎src/semantic-router/pkg/apiserver/server.go‎
Lines changed: 1 addition & 1 deletion b/‎src/semantic-router/pkg/apiserver/server.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/semantic-router/pkg/apiserver/server_test.go‎
Lines changed: 102 additions & 62 deletions b/‎src/semantic-router/pkg/apiserver/server_test.go‎
Lines changed: 102 additions & 62 deletions
@@ -46,7 +46,7 @@ func main() {
 	}
 
 	// Load configuration to initialize tracing
-	cfg, err := config.ParseConfigFile(*configPath)
+	cfg, err := config.Parse(*configPath)
 	if err != nil {
 		logging.Fatalf("Failed to load config: %v", err)
 	}
@@ -114,18 +114,18 @@ func main() {
 	logging.Infof("Starting vLLM Semantic Router ExtProc with config: %s", *configPath)
 
 	// Initialize embedding models if configured (Long-context support)
-	cfg, err = config.LoadConfig(*configPath)
+	cfg, err = config.Load(*configPath)
 	if err != nil {
 		logging.Warnf("Failed to load config for embedding models: %v", err)
-	} else if cfg.EmbeddingModels.Qwen3ModelPath != "" || cfg.EmbeddingModels.GemmaModelPath != "" {
+	} else if cfg.Qwen3ModelPath != "" || cfg.GemmaModelPath != "" {
 		logging.Infof("Initializing embedding models...")
-		logging.Infof("  Qwen3 model: %s", cfg.EmbeddingModels.Qwen3ModelPath)
-		logging.Infof("  Gemma model: %s", cfg.EmbeddingModels.GemmaModelPath)
+		logging.Infof("  Qwen3 model: %s", cfg.Qwen3ModelPath)
+		logging.Infof("  Gemma model: %s", cfg.GemmaModelPath)
 		logging.Infof("  Use CPU: %v", cfg.EmbeddingModels.UseCPU)
 
 		if err := candle_binding.InitEmbeddingModels(
-			cfg.EmbeddingModels.Qwen3ModelPath,
-			cfg.EmbeddingModels.GemmaModelPath,
+			cfg.Qwen3ModelPath,
+			cfg.GemmaModelPath,
 			cfg.EmbeddingModels.UseCPU,
 		); err != nil {
 			logging.Errorf("Failed to initialize embedding models: %v", err)
 
@@ -21,6 +21,7 @@ require (
 	github.com/openai/openai-go v1.12.0
 	github.com/prometheus/client_golang v1.23.0
 	github.com/prometheus/client_model v0.6.2
+	github.com/samber/lo v1.52.0
 	github.com/stretchr/testify v1.11.1
 	github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000
 	go.opentelemetry.io/otel v1.38.0
@@ -31,6 +32,7 @@ require (
 	go.uber.org/zap v1.27.0
 	golang.org/x/sys v0.37.0
 	google.golang.org/grpc v1.75.0
+	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
 	k8s.io/apimachinery v0.31.4
 	sigs.k8s.io/yaml v1.6.0
@@ -100,7 +102,6 @@ require (
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 // indirect
 	google.golang.org/protobuf v1.36.9 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
-	gopkg.in/yaml.v2 v2.4.0 // indirect
 	k8s.io/klog/v2 v2.130.1 // indirect
 	k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
 	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
 
@@ -259,6 +259,8 @@ github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR
 github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
 github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
 github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
+github.com/samber/lo v1.52.0 h1:Rvi+3BFHES3A8meP33VPAxiBZX/Aws5RxrschYGjomw=
+github.com/samber/lo v1.52.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0=
 github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtmuhtR2uUrrJOpYzYRvbcPAid+g=
 github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
 github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
 
@@ -80,7 +80,7 @@ func (s *ClassificationAPIServer) getLoadedModelsInfo() []ModelInfo {
 	}
 
 	// Category classifier model
-	if s.config.Classifier.CategoryModel.CategoryMappingPath != "" {
+	if s.config.CategoryMappingPath != "" {
 		categories := []string{}
 		// Extract category names from config.Categories
 		for _, cat := range s.config.Categories {
@@ -91,27 +91,27 @@ func (s *ClassificationAPIServer) getLoadedModelsInfo() []ModelInfo {
 			Name:       "category_classifier",
 			Type:       "intent_classification",
 			Loaded:     true,
-			ModelPath:  s.config.Classifier.CategoryModel.ModelID,
+			ModelPath:  s.config.CategoryModel.ModelID,
 			Categories: categories,
 			Metadata: map[string]string{
-				"mapping_path": s.config.Classifier.CategoryModel.CategoryMappingPath,
+				"mapping_path": s.config.CategoryMappingPath,
 				"model_type":   "modernbert",
-				"threshold":    fmt.Sprintf("%.2f", s.config.Classifier.CategoryModel.Threshold),
+				"threshold":    fmt.Sprintf("%.2f", s.config.CategoryModel.Threshold),
 			},
 		})
 	}
 
 	// PII classifier model
-	if s.config.Classifier.PIIModel.PIIMappingPath != "" {
+	if s.config.PIIMappingPath != "" {
 		models = append(models, ModelInfo{
 			Name:      "pii_classifier",
 			Type:      "pii_detection",
 			Loaded:    true,
-			ModelPath: s.config.Classifier.PIIModel.ModelID,
+			ModelPath: s.config.PIIModel.ModelID,
 			Metadata: map[string]string{
-				"mapping_path": s.config.Classifier.PIIModel.PIIMappingPath,
+				"mapping_path": s.config.PIIMappingPath,
 				"model_type":   "modernbert_token",
-				"threshold":    fmt.Sprintf("%.2f", s.config.Classifier.PIIModel.Threshold),
+				"threshold":    fmt.Sprintf("%.2f", s.config.PIIModel.Threshold),
 			},
 		})
 	}
@@ -130,15 +130,15 @@ func (s *ClassificationAPIServer) getLoadedModelsInfo() []ModelInfo {
 	}
 
 	// BERT similarity model
-	if s.config.BertModel.ModelID != "" {
+	if s.config.ModelID != "" {
 		models = append(models, ModelInfo{
 			Name:      "bert_similarity_model",
 			Type:      "similarity",
 			Loaded:    true,
-			ModelPath: s.config.BertModel.ModelID,
+			ModelPath: s.config.ModelID,
 			Metadata: map[string]string{
 				"model_type": "sentence_transformer",
-				"threshold":  fmt.Sprintf("%.2f", s.config.BertModel.Threshold),
+				"threshold":  fmt.Sprintf("%.2f", s.config.Threshold),
 				"use_cpu":    fmt.Sprintf("%t", s.config.BertModel.UseCPU),
 			},
 		})
 
@@ -18,7 +18,7 @@ import (
 // Init starts the API server
 func Init(configPath string, port int, enableSystemPromptAPI bool) error {
 	// Load configuration
-	cfg, err := config.LoadConfig(configPath)
+	cfg, err := config.Load(configPath)
 	if err != nil {
 		return fmt.Errorf("failed to load config: %w", err)
 	}
 
@@ -214,12 +214,14 @@ func TestBatchClassificationConfiguration(t *testing.T) {
 		{
 			name: "Custom max batch size",
 			config: &config.RouterConfig{
-				API: config.APIConfig{
-					BatchClassification: struct {
-						Metrics config.BatchClassificationMetricsConfig `yaml:"metrics,omitempty"`
-					}{
-						Metrics: config.BatchClassificationMetricsConfig{
-							Enabled: true,
+				APIServer: config.APIServer{
+					API: config.APIConfig{
+						BatchClassification: struct {
+							Metrics config.BatchClassificationMetricsConfig `yaml:"metrics,omitempty"`
+						}{
+							Metrics: config.BatchClassificationMetricsConfig{
+								Enabled: true,
+							},
 						},
 					},
 				},
@@ -248,12 +250,14 @@ func TestBatchClassificationConfiguration(t *testing.T) {
 		{
 			name: "Valid request within custom limits",
 			config: &config.RouterConfig{
-				API: config.APIConfig{
-					BatchClassification: struct {
-						Metrics config.BatchClassificationMetricsConfig `yaml:"metrics,omitempty"`
-					}{
-						Metrics: config.BatchClassificationMetricsConfig{
-							Enabled: true,
+				APIServer: config.APIServer{
+					API: config.APIConfig{
+						BatchClassification: struct {
+							Metrics config.BatchClassificationMetricsConfig `yaml:"metrics,omitempty"`
+						}{
+							Metrics: config.BatchClassificationMetricsConfig{
+								Enabled: true,
+							},
 						},
 					},
 				},
@@ -305,23 +309,27 @@ func TestBatchClassificationConfiguration(t *testing.T) {
 func TestOpenAIModelsEndpoint(t *testing.T) {
 	// Test with default config (IncludeConfigModelsInList = false)
 	cfg := &config.RouterConfig{
-		VLLMEndpoints: []config.VLLMEndpoint{
-			{
-				Name:    "primary",
-				Address: "127.0.0.1",
-				Port:    8000,
-				Weight:  1,
-			},
-		},
-		ModelConfig: map[string]config.ModelParams{
-			"gpt-4o-mini": {
-				PreferredEndpoints: []string{"primary"},
+		BackendModels: config.BackendModels{
+			VLLMEndpoints: []config.VLLMEndpoint{
+				{
+					Name:    "primary",
+					Address: "127.0.0.1",
+					Port:    8000,
+					Weight:  1,
+				},
 			},
-			"llama-3.1-8b-instruct": {
-				PreferredEndpoints: []string{"primary"},
+			ModelConfig: map[string]config.ModelParams{
+				"gpt-4o-mini": {
+					PreferredEndpoints: []string{"primary"},
+				},
+				"llama-3.1-8b-instruct": {
+					PreferredEndpoints: []string{"primary"},
+				},
 			},
 		},
-		IncludeConfigModelsInList: false,
+		RouterOptions: config.RouterOptions{
+			IncludeConfigModelsInList: false,
+		},
 	}
 
 	apiServer := &ClassificationAPIServer{
@@ -371,23 +379,27 @@ func TestOpenAIModelsEndpoint(t *testing.T) {
 func TestOpenAIModelsEndpointWithConfigModels(t *testing.T) {
 	// Test with IncludeConfigModelsInList = true
 	cfg := &config.RouterConfig{
-		VLLMEndpoints: []config.VLLMEndpoint{
-			{
-				Name:    "primary",
-				Address: "127.0.0.1",
-				Port:    8000,
-				Weight:  1,
-			},
-		},
-		ModelConfig: map[string]config.ModelParams{
-			"gpt-4o-mini": {
-				PreferredEndpoints: []string{"primary"},
+		BackendModels: config.BackendModels{
+			VLLMEndpoints: []config.VLLMEndpoint{
+				{
+					Name:    "primary",
+					Address: "127.0.0.1",
+					Port:    8000,
+					Weight:  1,
+				},
 			},
-			"llama-3.1-8b-instruct": {
-				PreferredEndpoints: []string{"primary"},
+			ModelConfig: map[string]config.ModelParams{
+				"gpt-4o-mini": {
+					PreferredEndpoints: []string{"primary"},
+				},
+				"llama-3.1-8b-instruct": {
+					PreferredEndpoints: []string{"primary"},
+				},
 			},
 		},
-		IncludeConfigModelsInList: true,
+		RouterOptions: config.RouterOptions{
+			IncludeConfigModelsInList: true,
+		},
 	}
 
 	apiServer := &ClassificationAPIServer{
@@ -441,18 +453,32 @@ func TestOpenAIModelsEndpointWithConfigModels(t *testing.T) {
 func TestSystemPromptEndpointSecurity(t *testing.T) {
 	// Create test configuration with categories that have system prompts
 	cfg := &config.RouterConfig{
-		Categories: []config.Category{
-			{
-				Name:                "math",
-				SystemPrompt:        "You are a math expert.",
-				SystemPromptEnabled: &[]bool{true}[0], // Pointer to true
-				SystemPromptMode:    "replace",
-			},
-			{
-				Name:                "coding",
-				SystemPrompt:        "You are a coding assistant.",
-				SystemPromptEnabled: &[]bool{false}[0], // Pointer to false
-				SystemPromptMode:    "insert",
+		IntelligentRouting: config.IntelligentRouting{
+			Categories: []config.Category{
+				{
+					CategoryMetadata: config.CategoryMetadata{
+						Name: "math",
+					},
+					DomainAwarePolicies: config.DomainAwarePolicies{
+						SystemPromptPolicy: config.SystemPromptPolicy{
+							SystemPrompt:        "You are a math expert.",
+							SystemPromptEnabled: &[]bool{true}[0], // Pointer to true
+							SystemPromptMode:    "replace",
+						},
+					},
+				},
+				{
+					CategoryMetadata: config.CategoryMetadata{
+						Name: "coding",
+					},
+					DomainAwarePolicies: config.DomainAwarePolicies{
+						SystemPromptPolicy: config.SystemPromptPolicy{
+							SystemPrompt:        "You are a coding assistant.",
+							SystemPromptEnabled: &[]bool{false}[0], // Pointer to false
+							SystemPromptMode:    "insert",
+						},
+					},
+				},
 			},
 		},
 	}
@@ -633,16 +659,30 @@ func TestSystemPromptEndpointSecurity(t *testing.T) {
 func TestSystemPromptEndpointFunctionality(t *testing.T) {
 	// Create test configuration
 	cfg := &config.RouterConfig{
-		Categories: []config.Category{
-			{
-				Name:                "math",
-				SystemPrompt:        "You are a math expert.",
-				SystemPromptEnabled: &[]bool{true}[0],
-				SystemPromptMode:    "replace",
-			},
-			{
-				Name:         "no-prompt",
-				SystemPrompt: "", // No system prompt
+		IntelligentRouting: config.IntelligentRouting{
+			Categories: []config.Category{
+				{
+					CategoryMetadata: config.CategoryMetadata{
+						Name: "math",
+					},
+					DomainAwarePolicies: config.DomainAwarePolicies{
+						SystemPromptPolicy: config.SystemPromptPolicy{
+							SystemPrompt:        "You are a math expert.",
+							SystemPromptEnabled: &[]bool{true}[0],
+							SystemPromptMode:    "replace",
+						},
+					},
+				},
+				{
+					CategoryMetadata: config.CategoryMetadata{
+						Name: "no-prompt",
+					},
+					DomainAwarePolicies: config.DomainAwarePolicies{
+						SystemPromptPolicy: config.SystemPromptPolicy{
+							SystemPrompt: "", // No system prompt
+						},
+					},
+				},
 			},
 		},
 	}
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@ import (`
`18`	`18`	`// Init starts the API server`
`19`	`19`	`func Init(configPath string, port int, enableSystemPromptAPI bool) error {`
`20`	`20`	`// Load configuration`
`21`		`- cfg, err := config.LoadConfig(configPath)`
	`21`	`+ cfg, err := config.Load(configPath)`
`22`	`22`	`if err != nil {`
`23`	`23`	`return fmt.Errorf("failed to load config: %w", err)`
`24`	`24`	`}`