Skip to content

Commit c805974

Browse files
committed
feat: Add comprehensive monitoring metrics for batch classification API
Signed-off-by: OneZero-Y <[email protected]>
1 parent 35c8ec8 commit c805974

File tree

8 files changed

+799
-36
lines changed

8 files changed

+799
-36
lines changed

config/config.yaml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,4 +244,24 @@ api:
244244
batch_classification:
245245
max_batch_size: 100 # Maximum number of texts in a single batch
246246
concurrency_threshold: 5 # Switch to concurrent processing when batch size > this value
247-
max_concurrency: 8 # Maximum number of concurrent goroutines
247+
max_concurrency: 8 # Maximum number of concurrent goroutines
248+
249+
# Metrics configuration for monitoring batch classification performance
250+
metrics:
251+
enabled: true # Enable comprehensive metrics collection
252+
detailed_goroutine_tracking: true # Track individual goroutine lifecycle
253+
high_resolution_timing: false # Use nanosecond precision timing
254+
sample_rate: 1.0 # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%)
255+
256+
# Batch size range labels for metrics (used in duration metrics labels)
257+
batch_size_ranges:
258+
- {min: 1, max: 1, label: "1"}
259+
- {min: 2, max: 5, label: "2-5"}
260+
- {min: 6, max: 10, label: "6-10"}
261+
- {min: 11, max: 20, label: "11-20"}
262+
- {min: 21, max: 50, label: "21-50"}
263+
- {min: 51, max: -1, label: "50+"} # -1 means no upper limit
264+
265+
# Histogram buckets for metrics (directly configure what you need)
266+
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
267+
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

src/semantic-router/pkg/api/server.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"time"
1212

1313
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
14+
"github.com/vllm-project/semantic-router/semantic-router/pkg/metrics"
1415
"github.com/vllm-project/semantic-router/semantic-router/pkg/services"
1516
)
1617

@@ -91,6 +92,20 @@ func StartClassificationAPI(configPath string, port int) error {
9192
classificationSvc = services.NewPlaceholderClassificationService()
9293
}
9394

95+
// Initialize batch metrics configuration
96+
if cfg != nil && cfg.API.BatchClassification.Metrics.Enabled {
97+
metricsConfig := metrics.BatchMetricsConfig{
98+
Enabled: cfg.API.BatchClassification.Metrics.Enabled,
99+
DetailedGoroutineTracking: cfg.API.BatchClassification.Metrics.DetailedGoroutineTracking,
100+
DurationBuckets: cfg.API.BatchClassification.Metrics.DurationBuckets,
101+
SizeBuckets: cfg.API.BatchClassification.Metrics.SizeBuckets,
102+
BatchSizeRanges: cfg.API.BatchClassification.Metrics.BatchSizeRanges,
103+
HighResolutionTiming: cfg.API.BatchClassification.Metrics.HighResolutionTiming,
104+
SampleRate: cfg.API.BatchClassification.Metrics.SampleRate,
105+
}
106+
metrics.SetBatchMetricsConfig(metricsConfig)
107+
}
108+
94109
// Create server instance
95110
apiServer := &ClassificationAPIServer{
96111
classificationSvc: classificationSvc,
@@ -231,6 +246,8 @@ func (s *ClassificationAPIServer) handleBatchClassification(w http.ResponseWrite
231246

232247
// Input validation
233248
if len(req.Texts) == 0 {
249+
// Record validation error in metrics
250+
metrics.RecordBatchClassificationError("validation", "empty_texts")
234251
s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_INPUT", "texts array cannot be empty")
235252
return
236253
}
@@ -242,6 +259,8 @@ func (s *ClassificationAPIServer) handleBatchClassification(w http.ResponseWrite
242259
}
243260

244261
if len(req.Texts) > maxBatchSize {
262+
// Record validation error in metrics
263+
metrics.RecordBatchClassificationError("validation", "batch_too_large")
245264
s.writeErrorResponse(w, http.StatusBadRequest, "BATCH_TOO_LARGE",
246265
fmt.Sprintf("batch size cannot exceed %d texts", maxBatchSize))
247266
return
@@ -494,10 +513,26 @@ func (s *ClassificationAPIServer) getSystemInfo() SystemInfo {
494513

495514
// processSequentially handles small batches with sequential processing
496515
func (s *ClassificationAPIServer) processSequentially(texts []string, options *ClassificationOptions) ([]services.Classification, error) {
516+
start := time.Now()
517+
processingType := "sequential"
518+
batchSize := len(texts)
519+
520+
// Record request and batch size metrics
521+
metrics.RecordBatchClassificationRequest(processingType)
522+
metrics.RecordBatchSizeDistribution(processingType, batchSize)
523+
524+
// Defer recording processing time and text count
525+
defer func() {
526+
duration := time.Since(start).Seconds()
527+
metrics.RecordBatchClassificationDuration(processingType, batchSize, duration)
528+
metrics.RecordBatchClassificationTexts(processingType, batchSize)
529+
}()
530+
497531
results := make([]services.Classification, len(texts))
498532
for i, text := range texts {
499533
result, err := s.classifySingleText(text, options)
500534
if err != nil {
535+
metrics.RecordBatchClassificationError(processingType, "classification_failed")
501536
return nil, fmt.Errorf("failed to classify text at index %d: %w", i, err)
502537
}
503538
results[i] = result
@@ -507,6 +542,22 @@ func (s *ClassificationAPIServer) processSequentially(texts []string, options *C
507542

508543
// processConcurrently handles large batches with concurrent processing
509544
func (s *ClassificationAPIServer) processConcurrently(texts []string, options *ClassificationOptions) ([]services.Classification, error) {
545+
start := time.Now()
546+
processingType := "concurrent"
547+
batchSize := len(texts)
548+
batchID := fmt.Sprintf("batch_%d", time.Now().UnixNano())
549+
550+
// Record request and batch size metrics
551+
metrics.RecordBatchClassificationRequest(processingType)
552+
metrics.RecordBatchSizeDistribution(processingType, batchSize)
553+
554+
// Defer recording processing time and text count
555+
defer func() {
556+
duration := time.Since(start).Seconds()
557+
metrics.RecordBatchClassificationDuration(processingType, batchSize, duration)
558+
metrics.RecordBatchClassificationTexts(processingType, batchSize)
559+
}()
560+
510561
// Get max concurrency from config, default to 8
511562
maxConcurrency := 8
512563
if s.config != nil && s.config.API.BatchClassification.MaxConcurrency > 0 {
@@ -523,6 +574,18 @@ func (s *ClassificationAPIServer) processConcurrently(texts []string, options *C
523574
wg.Add(1)
524575
go func(index int, txt string) {
525576
defer wg.Done()
577+
578+
// Record goroutine start (if detailed tracking is enabled)
579+
metricsConfig := metrics.GetBatchMetricsConfig()
580+
if metricsConfig.DetailedGoroutineTracking {
581+
metrics.ConcurrentGoroutines.WithLabelValues(batchID).Inc()
582+
583+
defer func() {
584+
// Record goroutine end
585+
metrics.ConcurrentGoroutines.WithLabelValues(batchID).Dec()
586+
}()
587+
}
588+
526589
semaphore <- struct{}{}
527590
defer func() { <-semaphore }()
528591

@@ -532,6 +595,7 @@ func (s *ClassificationAPIServer) processConcurrently(texts []string, options *C
532595
result, err := s.classifySingleText(txt, options)
533596
if err != nil {
534597
errors[index] = err
598+
metrics.RecordBatchClassificationError(processingType, "classification_failed")
535599
return
536600
}
537601
results[index] = result

src/semantic-router/pkg/api/server_test.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,13 +217,17 @@ func TestBatchClassificationConfiguration(t *testing.T) {
217217
config: &config.RouterConfig{
218218
API: config.APIConfig{
219219
BatchClassification: struct {
220-
MaxBatchSize int `yaml:"max_batch_size,omitempty"`
221-
ConcurrencyThreshold int `yaml:"concurrency_threshold,omitempty"`
222-
MaxConcurrency int `yaml:"max_concurrency,omitempty"`
220+
MaxBatchSize int `yaml:"max_batch_size,omitempty"`
221+
ConcurrencyThreshold int `yaml:"concurrency_threshold,omitempty"`
222+
MaxConcurrency int `yaml:"max_concurrency,omitempty"`
223+
Metrics config.BatchClassificationMetricsConfig `yaml:"metrics,omitempty"`
223224
}{
224225
MaxBatchSize: 3, // Custom small limit
225226
ConcurrencyThreshold: 2,
226227
MaxConcurrency: 4,
228+
Metrics: config.BatchClassificationMetricsConfig{
229+
Enabled: true,
230+
},
227231
},
228232
},
229233
},
@@ -253,13 +257,17 @@ func TestBatchClassificationConfiguration(t *testing.T) {
253257
config: &config.RouterConfig{
254258
API: config.APIConfig{
255259
BatchClassification: struct {
256-
MaxBatchSize int `yaml:"max_batch_size,omitempty"`
257-
ConcurrencyThreshold int `yaml:"concurrency_threshold,omitempty"`
258-
MaxConcurrency int `yaml:"max_concurrency,omitempty"`
260+
MaxBatchSize int `yaml:"max_batch_size,omitempty"`
261+
ConcurrencyThreshold int `yaml:"concurrency_threshold,omitempty"`
262+
MaxConcurrency int `yaml:"max_concurrency,omitempty"`
263+
Metrics config.BatchClassificationMetricsConfig `yaml:"metrics,omitempty"`
259264
}{
260265
MaxBatchSize: 10,
261266
ConcurrencyThreshold: 3,
262267
MaxConcurrency: 2,
268+
Metrics: config.BatchClassificationMetricsConfig{
269+
Enabled: true,
270+
},
263271
},
264272
},
265273
},

src/semantic-router/pkg/config/config.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,41 @@ type APIConfig struct {
9494

9595
// Maximum number of concurrent goroutines for batch processing
9696
MaxConcurrency int `yaml:"max_concurrency,omitempty"`
97+
98+
// Metrics configuration for batch classification monitoring
99+
Metrics BatchClassificationMetricsConfig `yaml:"metrics,omitempty"`
97100
} `yaml:"batch_classification"`
98101
}
99102

103+
// BatchClassificationMetricsConfig represents configuration for batch classification metrics
104+
type BatchClassificationMetricsConfig struct {
105+
// Sample rate for metrics collection (0.0-1.0, 1.0 means collect all metrics)
106+
SampleRate float64 `yaml:"sample_rate,omitempty"`
107+
108+
// Batch size range labels for metrics
109+
BatchSizeRanges []BatchSizeRangeConfig `yaml:"batch_size_ranges,omitempty"`
110+
111+
// Histogram buckets for metrics (directly configured)
112+
DurationBuckets []float64 `yaml:"duration_buckets,omitempty"`
113+
SizeBuckets []float64 `yaml:"size_buckets,omitempty"`
114+
115+
// Enable detailed metrics collection
116+
Enabled bool `yaml:"enabled,omitempty"`
117+
118+
// Enable detailed goroutine tracking (may impact performance)
119+
DetailedGoroutineTracking bool `yaml:"detailed_goroutine_tracking,omitempty"`
120+
121+
// Enable high-resolution timing (nanosecond precision)
122+
HighResolutionTiming bool `yaml:"high_resolution_timing,omitempty"`
123+
}
124+
125+
// BatchSizeRangeConfig defines a batch size range with its boundaries and label
126+
type BatchSizeRangeConfig struct {
127+
Min int `yaml:"min"`
128+
Max int `yaml:"max"` // -1 means no upper limit
129+
Label string `yaml:"label"`
130+
}
131+
100132
// PromptGuardConfig represents configuration for the prompt guard jailbreak detection
101133
type PromptGuardConfig struct {
102134
// Enable prompt guard jailbreak detection

0 commit comments

Comments
 (0)