Skip to content

Commit e1b0851

Browse files
authored
Merge branch 'main' into fix_quickstart_script
2 parents 7c14a6c + 9e5c6ce commit e1b0851

File tree

5 files changed

+565
-0
lines changed

5 files changed

+565
-0
lines changed

src/semantic-router/pkg/config/config.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,19 @@ import (
1010
"gopkg.in/yaml.v3"
1111
)
1212

13+
// KeywordRule defines a rule for keyword-based classification.
14+
type KeywordRule struct {
15+
Category string `yaml:"category"`
16+
Operator string `yaml:"operator"`
17+
Keywords []string `yaml:"keywords"`
18+
CaseSensitive bool `yaml:"case_sensitive"`
19+
}
20+
1321
// RouterConfig represents the main configuration for the LLM Router
1422
type RouterConfig struct {
23+
// Keyword-based classification rules
24+
KeywordRules []KeywordRule `yaml:"keyword_rules,omitempty"`
25+
1526
// BERT model configuration for Candle BERT similarity comparison
1627
BertModel struct {
1728
ModelID string `yaml:"model_id"`
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
package classification
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
"testing"
7+
8+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
9+
)
10+
11+
// --- Current Regex Implementation ---
12+
// This uses the currently modified keyword_classifier.go with regex matching.
13+
14+
func BenchmarkKeywordClassifierRegex(b *testing.B) {
15+
rulesConfig := []config.KeywordRule{
16+
{Category: "cat-and", Operator: "AND", Keywords: []string{"apple", "banana"}, CaseSensitive: false},
17+
{Category: "cat-or", Operator: "OR", Keywords: []string{"orange", "grape"}, CaseSensitive: true},
18+
{Category: "cat-nor", Operator: "NOR", Keywords: []string{"disallowed"}, CaseSensitive: false},
19+
}
20+
21+
testTextAndMatch := "I like apple and banana"
22+
testTextOrMatch := "I prefer orange juice"
23+
testTextNorMatch := "This text is clean"
24+
testTextNoMatch := "Something else entirely with disallowed words" // To fail all above for final no match
25+
26+
classifierRegex, err := NewKeywordClassifier(rulesConfig)
27+
if err != nil {
28+
b.Fatalf("Failed to initialize KeywordClassifier: %v", err)
29+
}
30+
31+
b.Run("Regex_AND_Match", func(b *testing.B) {
32+
b.ResetTimer()
33+
for i := 0; i < b.N; i++ {
34+
_, _, _ = classifierRegex.Classify(testTextAndMatch)
35+
}
36+
})
37+
b.Run("Regex_OR_Match", func(b *testing.B) {
38+
b.ResetTimer()
39+
for i := 0; i < b.N; i++ {
40+
_, _, _ = classifierRegex.Classify(testTextOrMatch)
41+
}
42+
})
43+
b.Run("Regex_NOR_Match", func(b *testing.B) {
44+
b.ResetTimer()
45+
for i := 0; i < b.N; i++ {
46+
_, _, _ = classifierRegex.Classify(testTextNorMatch)
47+
}
48+
})
49+
b.Run("Regex_No_Match", func(b *testing.B) {
50+
b.ResetTimer()
51+
for i := 0; i < b.N; i++ {
52+
_, _, _ = classifierRegex.Classify(testTextNoMatch)
53+
}
54+
})
55+
56+
// Scenario: Keywords with varying lengths
57+
rulesConfigLongKeywords := []config.KeywordRule{
58+
{Category: "long-kw", Operator: "OR", Keywords: []string{"supercalifragilisticexpialidocious", "pneumonoultramicroscopicsilicovolcanoconiosis"}, CaseSensitive: false},
59+
}
60+
classifierLongKeywords, err := NewKeywordClassifier(rulesConfigLongKeywords)
61+
if err != nil {
62+
b.Fatalf("Failed to initialize classifierLongKeywords: %v", err)
63+
}
64+
b.Run("Regex_LongKeywords", func(b *testing.B) {
65+
b.ResetTimer()
66+
for i := 0; i < b.N; i++ {
67+
_, _, _ = classifierLongKeywords.Classify("This text contains supercalifragilisticexpialidocious and other long words.")
68+
}
69+
})
70+
71+
// Scenario: Texts with varying lengths
72+
rulesConfigShortText := []config.KeywordRule{
73+
{Category: "short-text", Operator: "OR", Keywords: []string{"short"}, CaseSensitive: false},
74+
}
75+
classifierShortText, err := NewKeywordClassifier(rulesConfigShortText)
76+
if err != nil {
77+
b.Fatalf("Failed to initialize classifierShortText: %v", err)
78+
}
79+
b.Run("Regex_ShortText", func(b *testing.B) {
80+
b.ResetTimer()
81+
for i := 0; i < b.N; i++ {
82+
_, _, _ = classifierShortText.Classify("short")
83+
}
84+
})
85+
86+
rulesConfigLongText := []config.KeywordRule{
87+
{Category: "long-text", Operator: "OR", Keywords: []string{"endword"}, CaseSensitive: false},
88+
}
89+
classifierLongText, err := NewKeywordClassifier(rulesConfigLongText)
90+
if err != nil {
91+
b.Fatalf("Failed to initialize classifierLongText: %v", err)
92+
}
93+
longText := strings.Repeat("word ", 1000) + "endword" // Text of ~5000 characters
94+
b.Run("Regex_LongText", func(b *testing.B) {
95+
b.ResetTimer()
96+
for i := 0; i < b.N; i++ {
97+
_, _, _ = classifierLongText.Classify(longText)
98+
}
99+
})
100+
101+
// Scenario: Rules with a larger number of keywords
102+
manyKeywords := make([]string, 100)
103+
for i := 0; i < 100; i++ {
104+
manyKeywords[i] = fmt.Sprintf("keyword%d", i)
105+
}
106+
rulesConfigManyKeywords := []config.KeywordRule{
107+
{Category: "many-kw", Operator: "OR", Keywords: manyKeywords, CaseSensitive: false},
108+
}
109+
classifierManyKeywords, err := NewKeywordClassifier(rulesConfigManyKeywords)
110+
if err != nil {
111+
b.Fatalf("Failed to initialize classifierManyKeywords: %v", err)
112+
}
113+
b.Run("Regex_ManyKeywords", func(b *testing.B) {
114+
b.ResetTimer()
115+
for i := 0; i < b.N; i++ {
116+
_, _, _ = classifierManyKeywords.Classify("This text contains keyword99")
117+
}
118+
})
119+
120+
// Scenario: Keywords with many escaped characters
121+
rulesConfigComplexKeywords := []config.KeywordRule{
122+
{Category: "complex-kw", Operator: "OR", Keywords: []string{"[email protected]", "C:\\Program Files\\"}, CaseSensitive: false},
123+
}
124+
classifierComplexKeywords, err := NewKeywordClassifier(rulesConfigComplexKeywords)
125+
if err != nil {
126+
b.Fatalf("Failed to initialize classifierComplexKeywords: %v", err)
127+
}
128+
b.Run("Regex_ComplexKeywords", func(b *testing.B) {
129+
b.ResetTimer()
130+
for i := 0; i < b.N; i++ {
131+
_, _, _ = classifierComplexKeywords.Classify("Please send to [email protected] or check C:\\Program Files\\")
132+
}
133+
})
134+
}

src/semantic-router/pkg/utils/classification/classifier.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ type Classifier struct {
204204
jailbreakInference JailbreakInference
205205
piiInitializer PIIInitializer
206206
piiInference PIIInference
207+
keywordClassifier *KeywordClassifier
207208

208209
// Dependencies - MCP-based classifiers
209210
mcpCategoryInitializer MCPCategoryInitializer
@@ -247,6 +248,12 @@ func withPII(piiMapping *PIIMapping, piiInitializer PIIInitializer, piiInference
247248
}
248249
}
249250

251+
func withKeywordClassifier(keywordClassifier *KeywordClassifier) option {
252+
return func(c *Classifier) {
253+
c.keywordClassifier = keywordClassifier
254+
}
255+
}
256+
250257
// initModels initializes the models for the classifier
251258
func initModels(classifier *Classifier) (*Classifier, error) {
252259
// Initialize either in-tree OR MCP-based category classifier
@@ -303,6 +310,16 @@ func NewClassifier(cfg *config.RouterConfig, categoryMapping *CategoryMapping, p
303310
withPII(piiMapping, createPIIInitializer(), createPIIInference()),
304311
}
305312

313+
// Add keyword classifier if configured
314+
if len(cfg.KeywordRules) > 0 {
315+
keywordClassifier, err := NewKeywordClassifier(cfg.KeywordRules)
316+
if err != nil {
317+
observability.Errorf("Failed to create keyword classifier: %v", err)
318+
return nil, err
319+
}
320+
options = append(options, withKeywordClassifier(keywordClassifier))
321+
}
322+
306323
// Add in-tree classifier if configured
307324
if cfg.Classifier.CategoryModel.ModelID != "" {
308325
options = append(options, withCategory(categoryMapping, createCategoryInitializer(cfg.Classifier.CategoryModel.UseModernBERT), createCategoryInference(cfg.Classifier.CategoryModel.UseModernBERT)))
@@ -342,6 +359,17 @@ func (c *Classifier) initializeCategoryClassifier() error {
342359

343360
// ClassifyCategory performs category classification on the given text
344361
func (c *Classifier) ClassifyCategory(text string) (string, float64, error) {
362+
// Try keyword classifier first
363+
if c.keywordClassifier != nil {
364+
category, confidence, err := c.keywordClassifier.Classify(text)
365+
if err != nil {
366+
return "", 0.0, err
367+
}
368+
if category != "" {
369+
return category, confidence, nil
370+
}
371+
}
372+
345373
// Try in-tree first if properly configured
346374
if c.IsCategoryEnabled() && c.categoryInference != nil {
347375
return c.classifyCategoryInTree(text)
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
package classification
2+
3+
import (
4+
"fmt"
5+
"regexp"
6+
"unicode"
7+
8+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
9+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
10+
)
11+
12+
// preppedKeywordRule stores preprocessed keywords for efficient matching.
13+
type preppedKeywordRule struct {
14+
Category string
15+
Operator string
16+
CaseSensitive bool
17+
OriginalKeywords []string // For logging/returning original case
18+
CompiledRegexpsCS []*regexp.Regexp // Compiled regex for case-sensitive
19+
CompiledRegexpsCI []*regexp.Regexp // Compiled regex for case-insensitive
20+
}
21+
22+
// KeywordClassifier implements keyword-based classification logic.
23+
type KeywordClassifier struct {
24+
rules []preppedKeywordRule // Store preprocessed rules
25+
}
26+
27+
// NewKeywordClassifier creates a new KeywordClassifier.
28+
func NewKeywordClassifier(cfgRules []config.KeywordRule) (*KeywordClassifier, error) {
29+
preppedRules := make([]preppedKeywordRule, len(cfgRules))
30+
for i, rule := range cfgRules {
31+
// Validate operator
32+
switch rule.Operator {
33+
case "AND", "OR", "NOR":
34+
// Valid operator
35+
default:
36+
return nil, fmt.Errorf("unsupported keyword rule operator: %q for category %q", rule.Operator, rule.Category)
37+
}
38+
39+
preppedRule := preppedKeywordRule{
40+
Category: rule.Category,
41+
Operator: rule.Operator,
42+
CaseSensitive: rule.CaseSensitive,
43+
OriginalKeywords: rule.Keywords,
44+
}
45+
46+
// Compile regexps for both case-sensitive and case-insensitive
47+
preppedRule.CompiledRegexpsCS = make([]*regexp.Regexp, len(rule.Keywords))
48+
preppedRule.CompiledRegexpsCI = make([]*regexp.Regexp, len(rule.Keywords))
49+
50+
for j, keyword := range rule.Keywords {
51+
quotedKeyword := regexp.QuoteMeta(keyword)
52+
// Conditionally add word boundaries. If the keyword contains at least one word character,
53+
// apply word boundaries. Otherwise, match literally without boundaries.
54+
hasWordChar := false
55+
for _, r := range keyword {
56+
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
57+
hasWordChar = true
58+
break
59+
}
60+
}
61+
62+
patternCS := quotedKeyword
63+
patternCI := "(?i)" + quotedKeyword
64+
65+
if hasWordChar {
66+
patternCS = "\\b" + patternCS + "\\b"
67+
patternCI = "(?i)\\b" + quotedKeyword + "\\b"
68+
}
69+
70+
var err error
71+
preppedRule.CompiledRegexpsCS[j], err = regexp.Compile(patternCS)
72+
if err != nil {
73+
observability.Errorf("Failed to compile case-sensitive regex for keyword %q: %v", keyword, err)
74+
return nil, err
75+
}
76+
77+
preppedRule.CompiledRegexpsCI[j], err = regexp.Compile(patternCI)
78+
if err != nil {
79+
observability.Errorf("Failed to compile case-insensitive regex for keyword %q: %v", keyword, err)
80+
return nil, err
81+
}
82+
}
83+
preppedRules[i] = preppedRule
84+
}
85+
return &KeywordClassifier{rules: preppedRules}, nil
86+
}
87+
88+
// Classify performs keyword-based classification on the given text.
89+
func (c *KeywordClassifier) Classify(text string) (string, float64, error) {
90+
for _, rule := range c.rules {
91+
matched, keywords, err := c.matches(text, rule) // Error handled
92+
if err != nil {
93+
return "", 0.0, err // Propagate error
94+
}
95+
if matched {
96+
if len(keywords) > 0 {
97+
observability.Infof("Keyword-based classification matched category %q with keywords: %v", rule.Category, keywords)
98+
} else {
99+
observability.Infof("Keyword-based classification matched category %q with a NOR rule.", rule.Category)
100+
}
101+
return rule.Category, 1.0, nil
102+
}
103+
}
104+
return "", 0.0, nil
105+
}
106+
107+
// matches checks if the text matches the given keyword rule.
108+
func (c *KeywordClassifier) matches(text string, rule preppedKeywordRule) (bool, []string, error) {
109+
var matchedKeywords []string
110+
var regexpsToUse []*regexp.Regexp
111+
112+
if rule.CaseSensitive {
113+
regexpsToUse = rule.CompiledRegexpsCS
114+
} else {
115+
regexpsToUse = rule.CompiledRegexpsCI
116+
}
117+
118+
// Check for matches based on the operator
119+
switch rule.Operator {
120+
case "AND":
121+
for i, re := range regexpsToUse {
122+
if re == nil {
123+
return false, nil, fmt.Errorf("nil regular expression found in rule for category %q at index %d. This indicates a failed compilation during initialization", rule.Category, i)
124+
}
125+
if !re.MatchString(text) {
126+
return false, nil, nil
127+
}
128+
matchedKeywords = append(matchedKeywords, rule.OriginalKeywords[i])
129+
}
130+
return true, matchedKeywords, nil
131+
case "OR":
132+
for i, re := range regexpsToUse {
133+
if re == nil {
134+
return false, nil, fmt.Errorf("nil regular expression found in rule for category %q at index %d. This indicates a failed compilation during initialization", rule.Category, i)
135+
}
136+
if re.MatchString(text) {
137+
return true, []string{rule.OriginalKeywords[i]}, nil
138+
}
139+
}
140+
return false, nil, nil
141+
case "NOR":
142+
for i, re := range regexpsToUse {
143+
if re == nil {
144+
return false, nil, fmt.Errorf("nil regular expression found in rule for category %q at index %d. This indicates a failed compilation during initialization", rule.Category, i)
145+
}
146+
if re.MatchString(text) {
147+
return false, nil, nil
148+
}
149+
}
150+
return true, matchedKeywords, nil
151+
default:
152+
return false, nil, fmt.Errorf("unsupported keyword rule operator: %q", rule.Operator)
153+
}
154+
}

0 commit comments

Comments
 (0)