Skip to content

Commit 9ee8ad4

Browse files
committed
feat: add llm_classifier_latency_seconds
Signed-off-by: bitliu <[email protected]>
1 parent 2a52d57 commit 9ee8ad4

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

src/semantic-router/pkg/metrics/metrics.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,16 @@ var (
7070
},
7171
)
7272

73+
// ClassifierLatency tracks the latency of classifier invocations by type
74+
ClassifierLatency = promauto.NewHistogramVec(
75+
prometheus.HistogramOpts{
76+
Name: "llm_classifier_latency_seconds",
77+
Help: "The latency of classifier invocations by type",
78+
Buckets: prometheus.DefBuckets,
79+
},
80+
[]string{"classifier"},
81+
)
82+
7383
// CacheHits tracks cache hits and misses
7484
CacheHits = promauto.NewCounter(
7585
prometheus.CounterOpts{
@@ -154,3 +164,8 @@ func RecordPIIViolations(model string, piiTypes []string) {
154164
PIIViolations.WithLabelValues(model, piiType).Inc()
155165
}
156166
}
167+
168+
// RecordClassifierLatency records the latency for a classifier invocation by type
169+
func RecordClassifierLatency(classifier string, seconds float64) {
170+
ClassifierLatency.WithLabelValues(classifier).Observe(seconds)
171+
}

src/semantic-router/pkg/utils/classification/classifier.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"log"
66
"strings"
77
"sync"
8+
"time"
89

910
candle_binding "github.com/vllm-project/semantic-router/candle-binding"
1011
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
@@ -115,13 +116,15 @@ func (c *Classifier) CheckForJailbreak(text string) (bool, string, float32, erro
115116
var result candle_binding.ClassResult
116117
var err error
117118

119+
start := time.Now()
118120
if c.Config.PromptGuard.UseModernBERT {
119121
// Use ModernBERT jailbreak classifier
120122
result, err = candle_binding.ClassifyModernBertJailbreakText(text)
121123
} else {
122124
// Use linear jailbreak classifier
123125
result, err = candle_binding.ClassifyJailbreakText(text)
124126
}
127+
metrics.RecordClassifierLatency("jailbreak", time.Since(start).Seconds())
125128

126129
if err != nil {
127130
return false, "", 0.0, fmt.Errorf("jailbreak classification failed: %w", err)
@@ -196,13 +199,15 @@ func (c *Classifier) ClassifyCategory(text string) (string, float64, error) {
196199
var result candle_binding.ClassResult
197200
var err error
198201

202+
start := time.Now()
199203
if c.Config.Classifier.CategoryModel.UseModernBERT {
200204
// Use ModernBERT classifier
201205
result, err = candle_binding.ClassifyModernBertText(text)
202206
} else {
203207
// Use linear classifier
204208
result, err = candle_binding.ClassifyText(text)
205209
}
210+
metrics.RecordClassifierLatency("category", time.Since(start).Seconds())
206211

207212
if err != nil {
208213
return "", 0.0, fmt.Errorf("classification error: %w", err)
@@ -243,7 +248,9 @@ func (c *Classifier) ClassifyPII(text string) ([]string, error) {
243248

244249
// Use ModernBERT PII token classifier for entity detection
245250
configPath := fmt.Sprintf("%s/config.json", c.Config.Classifier.PIIModel.ModelID)
251+
start := time.Now()
246252
tokenResult, err := candle_binding.ClassifyModernBertPIITokens(text, configPath)
253+
metrics.RecordClassifierLatency("pii", time.Since(start).Seconds())
247254
if err != nil {
248255
return nil, fmt.Errorf("PII token classification error: %w", err)
249256
}
@@ -323,7 +330,9 @@ func (c *Classifier) AnalyzeContentForPII(contentList []string) (bool, []PIIAnal
323330

324331
// Use ModernBERT PII token classifier for detailed analysis
325332
configPath := fmt.Sprintf("%s/config.json", c.Config.Classifier.PIIModel.ModelID)
333+
start := time.Now()
326334
tokenResult, err := candle_binding.ClassifyModernBertPIITokens(content, configPath)
335+
metrics.RecordClassifierLatency("pii", time.Since(start).Seconds())
327336
if err != nil {
328337
log.Printf("Error analyzing content %d: %v", i, err)
329338
continue

0 commit comments

Comments
 (0)