Skip to content

Commit 2d4ecc5

Browse files
Copilotrootfs
andcommitted
Add comprehensive tracing instrumentation and tests
Co-authored-by: rootfs <[email protected]>
1 parent d86d7f4 commit 2d4ecc5

File tree

2 files changed

+297
-1
lines changed

2 files changed

+297
-1
lines changed

src/semantic-router/pkg/extproc/request_handler.go

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,14 +560,50 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
560560
}
561561

562562
if classificationText != "" {
563+
// Start classification span
564+
classifyCtx, classifySpan := observability.StartSpan(ctx.TraceContext, observability.SpanClassification)
565+
classifyStart := time.Now()
566+
563567
// Find the most similar task description or classify, then select best model
564568
matchedModel := r.classifyAndSelectBestModel(classificationText)
569+
classifyTime := time.Since(classifyStart).Milliseconds()
570+
571+
// Get category information for the span
572+
categoryName := r.findCategoryForClassification(classificationText)
573+
574+
observability.SetSpanAttributes(classifySpan,
575+
attribute.String(observability.AttrCategoryName, categoryName),
576+
attribute.String(observability.AttrClassifierType, "bert"),
577+
attribute.Int64(observability.AttrClassificationTimeMs, classifyTime))
578+
classifySpan.End()
579+
ctx.TraceContext = classifyCtx
580+
565581
if matchedModel != originalModel && matchedModel != "" {
566-
// Get detected PII for policy checking
582+
// Start PII detection span if enabled
567583
allContent := pii.ExtractAllContent(userContent, nonUserMessages)
568584
if r.PIIChecker.IsPIIEnabled(matchedModel) {
585+
piiCtx, piiSpan := observability.StartSpan(ctx.TraceContext, observability.SpanPIIDetection)
586+
piiStart := time.Now()
587+
569588
observability.Infof("PII policy enabled for model %s", matchedModel)
570589
detectedPII := r.Classifier.DetectPIIInContent(allContent)
590+
591+
piiTime := time.Since(piiStart).Milliseconds()
592+
piiDetected := len(detectedPII) > 0
593+
594+
observability.SetSpanAttributes(piiSpan,
595+
attribute.Bool(observability.AttrPIIDetected, piiDetected),
596+
attribute.Int64(observability.AttrPIIDetectionTimeMs, piiTime))
597+
598+
if piiDetected {
599+
// Convert detected PII to comma-separated string
600+
piiTypesStr := strings.Join(detectedPII, ",")
601+
observability.SetSpanAttributes(piiSpan,
602+
attribute.String(observability.AttrPIITypes, piiTypesStr))
603+
}
604+
605+
piiSpan.End()
606+
ctx.TraceContext = piiCtx
571607

572608
// Check if the initially selected model passes PII policy
573609
allowed, deniedPII, err := r.PIIChecker.CheckPolicy(matchedModel, detectedPII)
@@ -622,6 +658,9 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
622658

623659
observability.Infof("Routing to model: %s", matchedModel)
624660

661+
// Start routing decision span
662+
routingCtx, routingSpan := observability.StartSpan(ctx.TraceContext, observability.SpanRoutingDecision)
663+
625664
// Check reasoning mode for this category using entropy-based approach
626665
useReasoning, categoryName, reasoningDecision := r.getEntropyBasedReasoningModeAndCategory(userContent)
627666
observability.Infof("Entropy-based reasoning decision for this query: %v on [%s] model (confidence: %.3f, reason: %s)",
@@ -630,6 +669,18 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
630669
effortForMetrics := r.getReasoningEffort(categoryName)
631670
metrics.RecordReasoningDecision(categoryName, matchedModel, useReasoning, effortForMetrics)
632671

672+
// Set routing attributes on span
673+
observability.SetSpanAttributes(routingSpan,
674+
attribute.String(observability.AttrRoutingStrategy, "auto"),
675+
attribute.String(observability.AttrRoutingReason, reasoningDecision.DecisionReason),
676+
attribute.String(observability.AttrOriginalModel, originalModel),
677+
attribute.String(observability.AttrSelectedModel, matchedModel),
678+
attribute.Bool(observability.AttrReasoningEnabled, useReasoning),
679+
attribute.String(observability.AttrReasoningEffort, effortForMetrics))
680+
681+
routingSpan.End()
682+
ctx.TraceContext = routingCtx
683+
633684
// Track VSR decision information
634685
ctx.VSRSelectedCategory = categoryName
635686
ctx.VSRSelectedModel = matchedModel
@@ -645,14 +696,28 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
645696
// Update the actual model that will be used
646697
actualModel = matchedModel
647698

699+
// Start backend selection span
700+
backendCtx, backendSpan := observability.StartSpan(ctx.TraceContext, observability.SpanBackendSelection)
701+
648702
// Select the best endpoint for this model
649703
endpointAddress, endpointFound := r.Config.SelectBestEndpointAddressForModel(matchedModel)
650704
if endpointFound {
651705
selectedEndpoint = endpointAddress
652706
observability.Infof("Selected endpoint address: %s for model: %s", selectedEndpoint, matchedModel)
707+
708+
// Extract endpoint name from config
709+
endpoints := r.Config.GetEndpointsForModel(matchedModel)
710+
if len(endpoints) > 0 {
711+
observability.SetSpanAttributes(backendSpan,
712+
attribute.String(observability.AttrEndpointName, endpoints[0].Name),
713+
attribute.String(observability.AttrEndpointAddress, selectedEndpoint))
714+
}
653715
} else {
654716
observability.Warnf("No endpoint found for model %s, using fallback", matchedModel)
655717
}
718+
719+
backendSpan.End()
720+
ctx.TraceContext = backendCtx
656721

657722
// Modify the model in the request
658723
openAIRequest.Model = openai.ChatModel(matchedModel)
@@ -688,21 +753,35 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
688753
}
689754

690755
if category != nil && category.SystemPrompt != "" && category.IsSystemPromptEnabled() {
756+
// Start system prompt injection span
757+
promptCtx, promptSpan := observability.StartSpan(ctx.TraceContext, observability.SpanSystemPromptInjection)
758+
691759
mode := category.GetSystemPromptMode()
692760
var injected bool
693761
modifiedBody, injected, err = addSystemPromptToRequestBody(modifiedBody, category.SystemPrompt, mode)
694762
if err != nil {
695763
observability.Errorf("Error adding system prompt to request: %v", err)
764+
observability.RecordError(promptSpan, err)
696765
metrics.RecordRequestError(actualModel, "serialization_error")
766+
promptSpan.End()
697767
return nil, status.Errorf(codes.Internal, "error adding system prompt: %v", err)
698768
}
769+
770+
observability.SetSpanAttributes(promptSpan,
771+
attribute.Bool("system_prompt.injected", injected),
772+
attribute.String("system_prompt.mode", mode),
773+
attribute.String(observability.AttrCategoryName, categoryName))
774+
699775
if injected {
700776
ctx.VSRInjectedSystemPrompt = true
701777
observability.Infof("Added category-specific system prompt for category: %s (mode: %s)", categoryName, mode)
702778
}
703779

704780
// Log metadata about system prompt injection (avoid logging sensitive user data)
705781
observability.Infof("System prompt injection completed for category: %s, body size: %d bytes", categoryName, len(modifiedBody))
782+
783+
promptSpan.End()
784+
ctx.TraceContext = promptCtx
706785
} else if category != nil && category.SystemPrompt != "" && !category.IsSystemPromptEnabled() {
707786
observability.Infof("System prompt disabled for category: %s", categoryName)
708787
}
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
package observability
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"go.opentelemetry.io/otel/attribute"
8+
"go.opentelemetry.io/otel/codes"
9+
)
10+
11+
func TestTracingConfiguration(t *testing.T) {
12+
tests := []struct {
13+
name string
14+
cfg TracingConfig
15+
wantErr bool
16+
}{
17+
{
18+
name: "disabled tracing",
19+
cfg: TracingConfig{
20+
Enabled: false,
21+
},
22+
wantErr: false,
23+
},
24+
{
25+
name: "stdout exporter",
26+
cfg: TracingConfig{
27+
Enabled: true,
28+
Provider: "opentelemetry",
29+
ExporterType: "stdout",
30+
SamplingType: "always_on",
31+
ServiceName: "test-service",
32+
ServiceVersion: "v1.0.0",
33+
DeploymentEnvironment: "test",
34+
},
35+
wantErr: false,
36+
},
37+
{
38+
name: "probabilistic sampling",
39+
cfg: TracingConfig{
40+
Enabled: true,
41+
Provider: "opentelemetry",
42+
ExporterType: "stdout",
43+
SamplingType: "probabilistic",
44+
SamplingRate: 0.5,
45+
ServiceName: "test-service",
46+
ServiceVersion: "v1.0.0",
47+
DeploymentEnvironment: "test",
48+
},
49+
wantErr: false,
50+
},
51+
}
52+
53+
for _, tt := range tests {
54+
t.Run(tt.name, func(t *testing.T) {
55+
ctx := context.Background()
56+
err := InitTracing(ctx, tt.cfg)
57+
if (err != nil) != tt.wantErr {
58+
t.Errorf("InitTracing() error = %v, wantErr %v", err, tt.wantErr)
59+
}
60+
61+
// Cleanup
62+
if err == nil {
63+
shutdownCtx := context.Background()
64+
_ = ShutdownTracing(shutdownCtx)
65+
}
66+
})
67+
}
68+
}
69+
70+
func TestSpanCreation(t *testing.T) {
71+
// Initialize tracing with stdout exporter
72+
ctx := context.Background()
73+
cfg := TracingConfig{
74+
Enabled: true,
75+
Provider: "opentelemetry",
76+
ExporterType: "stdout",
77+
SamplingType: "always_on",
78+
ServiceName: "test-service",
79+
ServiceVersion: "v1.0.0",
80+
DeploymentEnvironment: "test",
81+
}
82+
83+
err := InitTracing(ctx, cfg)
84+
if err != nil {
85+
t.Fatalf("Failed to initialize tracing: %v", err)
86+
}
87+
defer func() {
88+
shutdownCtx := context.Background()
89+
_ = ShutdownTracing(shutdownCtx)
90+
}()
91+
92+
// Test span creation
93+
spanCtx, span := StartSpan(ctx, SpanRequestReceived)
94+
if span == nil {
95+
t.Fatal("StartSpan returned nil span")
96+
}
97+
98+
// Test setting attributes
99+
SetSpanAttributes(span,
100+
attribute.String(AttrRequestID, "test-request-123"),
101+
attribute.String(AttrModelName, "gpt-4"),
102+
)
103+
104+
// Test recording error
105+
testErr := context.Canceled
106+
RecordError(span, testErr)
107+
span.SetStatus(codes.Error, "test error")
108+
109+
span.End()
110+
111+
// Verify context was updated
112+
if spanCtx == nil {
113+
t.Fatal("StartSpan returned nil context")
114+
}
115+
}
116+
117+
func TestTraceContextPropagation(t *testing.T) {
118+
// Initialize tracing
119+
ctx := context.Background()
120+
cfg := TracingConfig{
121+
Enabled: true,
122+
Provider: "opentelemetry",
123+
ExporterType: "stdout",
124+
SamplingType: "always_on",
125+
ServiceName: "test-service",
126+
ServiceVersion: "v1.0.0",
127+
DeploymentEnvironment: "test",
128+
}
129+
130+
err := InitTracing(ctx, cfg)
131+
if err != nil {
132+
t.Fatalf("Failed to initialize tracing: %v", err)
133+
}
134+
defer func() {
135+
shutdownCtx := context.Background()
136+
_ = ShutdownTracing(shutdownCtx)
137+
}()
138+
139+
// Create a span to establish trace context
140+
spanCtx, span := StartSpan(ctx, "test-span")
141+
defer span.End()
142+
143+
// Test injection
144+
headers := make(map[string]string)
145+
InjectTraceContext(spanCtx, headers)
146+
147+
// Verify trace context was injected
148+
if len(headers) == 0 {
149+
t.Error("InjectTraceContext did not inject any headers")
150+
}
151+
152+
// Test extraction
153+
extractedCtx := ExtractTraceContext(ctx, headers)
154+
if extractedCtx == nil {
155+
t.Error("ExtractTraceContext returned nil context")
156+
}
157+
}
158+
159+
func TestGetTracerWhenNotInitialized(t *testing.T) {
160+
// Don't initialize tracing
161+
tracer := GetTracer()
162+
if tracer == nil {
163+
t.Error("GetTracer returned nil when not initialized")
164+
}
165+
166+
// Should return a noop tracer that doesn't panic
167+
ctx := context.Background()
168+
_, span := tracer.Start(ctx, "test-span")
169+
if span == nil {
170+
t.Error("Noop tracer returned nil span")
171+
}
172+
span.End()
173+
}
174+
175+
func TestSpanAttributeConstants(t *testing.T) {
176+
// Verify span name constants are defined
177+
spanNames := []string{
178+
SpanRequestReceived,
179+
SpanClassification,
180+
SpanPIIDetection,
181+
SpanJailbreakDetection,
182+
SpanCacheLookup,
183+
SpanRoutingDecision,
184+
SpanBackendSelection,
185+
SpanUpstreamRequest,
186+
SpanResponseProcessing,
187+
SpanToolSelection,
188+
SpanSystemPromptInjection,
189+
}
190+
191+
for _, name := range spanNames {
192+
if name == "" {
193+
t.Errorf("Span name constant is empty")
194+
}
195+
if len(name) < 10 {
196+
t.Errorf("Span name %q is too short", name)
197+
}
198+
}
199+
200+
// Verify attribute key constants are defined
201+
attrKeys := []string{
202+
AttrRequestID,
203+
AttrModelName,
204+
AttrCategoryName,
205+
AttrRoutingStrategy,
206+
AttrPIIDetected,
207+
AttrJailbreakDetected,
208+
AttrCacheHit,
209+
AttrReasoningEnabled,
210+
}
211+
212+
for _, key := range attrKeys {
213+
if key == "" {
214+
t.Errorf("Attribute key constant is empty")
215+
}
216+
}
217+
}

0 commit comments

Comments
 (0)