Skip to content

Commit 6be7545

Browse files
authored
Merge branch 'main' into 0909-yuluo/docs
2 parents e532270 + fe0b5b5 commit 6be7545

File tree

15 files changed

+708
-330
lines changed

15 files changed

+708
-330
lines changed

.github/workflows/test-and-build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Run Test
1+
name: Test And Build
22

33
on:
44
schedule:

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
[![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Community-yellow)](https://huggingface.co/LLM-Semantic-Router)
77
[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
88
[![Crates.io](https://img.shields.io/crates/v/candle-semantic-router.svg)](https://crates.io/crates/candle-semantic-router)
9+
![](https://github.com/vllm-project/semantic-router/workflows/Test%20And%20Build/badge.svg)
910

1011
**📚 [Complete Documentation](https://vllm-semantic-router.com) | 🚀 [Quick Start](https://vllm-semantic-router.com/docs/getting-started/installation) | 📣 [Blog](https://vllm-semantic-router.com/blog/) | 📖 [API Reference](https://vllm-semantic-router.com/docs/api/router/)**
1112

config/config.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ classifier:
112112
threshold: 0.7
113113
use_cpu: true
114114
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
115-
load_aware: false
116115
categories:
117116
- name: business
118117
use_reasoning: false

deploy/kubernetes/config.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ classifier:
7878
threshold: 0.7
7979
use_cpu: true
8080
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
81-
load_aware: false
8281
categories:
8382
- name: business
8483
model_scores:

src/semantic-router/pkg/config/config.go

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"os"
66
"path/filepath"
7+
"slices"
78
"sync"
89

910
"gopkg.in/yaml.v3"
@@ -33,7 +34,6 @@ type RouterConfig struct {
3334
UseCPU bool `yaml:"use_cpu"`
3435
PIIMappingPath string `yaml:"pii_mapping_path"`
3536
} `yaml:"pii_model"`
36-
LoadAware bool `yaml:"load_aware"`
3737
} `yaml:"classifier"`
3838

3939
// Categories for routing queries
@@ -439,14 +439,7 @@ func (c *RouterConfig) IsModelAllowedForPIIType(modelName string, piiType string
439439
}
440440

441441
// If allow_by_default is false, only explicitly allowed PII types are permitted
442-
for _, allowedPII := range policy.PIITypes {
443-
if allowedPII == piiType {
444-
return true
445-
}
446-
}
447-
448-
// PII type not found in allowed list and allow_by_default is false
449-
return false
442+
return slices.Contains(policy.PIITypes, piiType)
450443
}
451444

452445
// IsModelAllowedForPIITypes checks if a model is allowed to process any of the given PII types
@@ -487,23 +480,17 @@ func (c *RouterConfig) GetEndpointsForModel(modelName string) []VLLMEndpoint {
487480

488481
// First, find all endpoints that can serve this model
489482
for _, endpoint := range c.VLLMEndpoints {
490-
for _, model := range endpoint.Models {
491-
if model == modelName {
492-
availableEndpoints = append(availableEndpoints, endpoint)
493-
break
494-
}
483+
if slices.Contains(endpoint.Models, modelName) {
484+
availableEndpoints = append(availableEndpoints, endpoint)
495485
}
496486
}
497487

498488
// Check if model has preferred endpoints configured
499489
if modelConfig, ok := c.ModelConfig[modelName]; ok && len(modelConfig.PreferredEndpoints) > 0 {
500490
var preferredEndpoints []VLLMEndpoint
501491
for _, endpoint := range availableEndpoints {
502-
for _, preferredName := range modelConfig.PreferredEndpoints {
503-
if endpoint.Name == preferredName {
504-
preferredEndpoints = append(preferredEndpoints, endpoint)
505-
break
506-
}
492+
if slices.Contains(modelConfig.PreferredEndpoints, endpoint.Name) {
493+
preferredEndpoints = append(preferredEndpoints, endpoint)
507494
}
508495
}
509496
if len(preferredEndpoints) > 0 {

src/semantic-router/pkg/config/config_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ classifier:
6060
use_cpu: true
6161
use_modernbert: false
6262
pii_mapping_path: "/path/to/pii.json"
63-
load_aware: true
6463
6564
categories:
6665
- name: "general"
@@ -138,7 +137,6 @@ tools:
138137
// Verify classifier config
139138
Expect(cfg.Classifier.CategoryModel.ModelID).To(Equal("test-category-model"))
140139
Expect(cfg.Classifier.CategoryModel.UseModernBERT).To(BeTrue())
141-
Expect(cfg.Classifier.LoadAware).To(BeTrue())
142140

143141
// Verify categories
144142
Expect(cfg.Categories).To(HaveLen(1))

src/semantic-router/pkg/extproc/request_handler.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -370,9 +370,6 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
370370
effortForMetrics := r.getReasoningEffort(categoryName)
371371
metrics.RecordReasoningDecision(categoryName, matchedModel, useReasoning, effortForMetrics)
372372

373-
// Track the model load for the selected model
374-
r.Classifier.IncrementModelLoad(matchedModel)
375-
376373
// Track the model routing change
377374
metrics.RecordModelRouting(originalModel, matchedModel)
378375

src/semantic-router/pkg/extproc/response_handler.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response
5252
float64(completionTokens),
5353
)
5454
metrics.RecordModelCompletionLatency(ctx.RequestModel, completionLatency.Seconds())
55-
r.Classifier.DecrementModelLoad(ctx.RequestModel)
5655

5756
// Compute and record cost if pricing is configured
5857
if r.Config != nil {

src/semantic-router/pkg/extproc/router.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,7 @@ func NewOpenAIRouter(configPath string) (*OpenAIRouter, error) {
131131

132132
// Create utility components
133133
piiChecker := pii.NewPolicyChecker(cfg, cfg.ModelConfig)
134-
modelTTFT := make(map[string]float64) // Empty TTFT map since load balancing is disabled
135-
classifier := classification.NewClassifier(cfg, categoryMapping, piiMapping, jailbreakMapping, modelTTFT)
134+
classifier := classification.NewClassifier(cfg, categoryMapping, piiMapping, jailbreakMapping)
136135

137136
// Create global classification service for API access
138137
services.NewClassificationService(classifier, cfg)

src/semantic-router/pkg/extproc/security_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ var _ = Describe("Security Checks", func() {
5252
},
5353
}
5454
router.PIIChecker = pii.NewPolicyChecker(cfg, cfg.ModelConfig)
55-
router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, router.Classifier.PIIMapping, nil, router.Classifier.ModelTTFT)
55+
router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, router.Classifier.PIIMapping, nil)
5656
})
5757

5858
It("should allow requests with no PII", func() {
@@ -97,7 +97,7 @@ var _ = Describe("Security Checks", func() {
9797
piiMapping, err := classification.LoadPIIMapping(cfg.Classifier.PIIModel.PIIMappingPath)
9898
Expect(err).NotTo(HaveOccurred())
9999

100-
router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, piiMapping, nil, router.Classifier.ModelTTFT)
100+
router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, piiMapping, nil)
101101
})
102102

103103
Describe("ClassifyPII method", func() {
@@ -339,7 +339,7 @@ var _ = Describe("Security Checks", func() {
339339
piiMapping, err := classification.LoadPIIMapping(cfg.Classifier.PIIModel.PIIMappingPath)
340340
Expect(err).NotTo(HaveOccurred())
341341

342-
router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, piiMapping, nil, router.Classifier.ModelTTFT)
342+
router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, piiMapping, nil)
343343
})
344344

345345
Describe("Error handling and edge cases", func() {
@@ -524,7 +524,7 @@ var _ = Describe("Security Checks", func() {
524524
IdxToLabel: map[string]string{"0": "benign", "1": "jailbreak"},
525525
}
526526

527-
router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, router.Classifier.PIIMapping, jailbreakMapping, router.Classifier.ModelTTFT)
527+
router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, router.Classifier.PIIMapping, jailbreakMapping)
528528
})
529529

530530
It("should process potential jailbreak attempts", func() {

0 commit comments

Comments
 (0)