|
| 1 | +package extproc |
| 2 | + |
| 3 | +import ( |
| 4 | + "encoding/json" |
| 5 | + "time" |
| 6 | + |
| 7 | + . "github.com/onsi/ginkgo/v2" |
| 8 | + . "github.com/onsi/gomega" |
| 9 | + |
| 10 | + core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" |
| 11 | + ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" |
| 12 | + "github.com/prometheus/client_golang/prometheus" |
| 13 | + dto "github.com/prometheus/client_model/go" |
| 14 | +) |
| 15 | + |
| 16 | +func getHistogramSampleCount(metricName, model string) uint64 { |
| 17 | + mf, _ := prometheus.DefaultGatherer.Gather() |
| 18 | + for _, fam := range mf { |
| 19 | + if fam.GetName() != metricName || fam.GetType() != dto.MetricType_HISTOGRAM { |
| 20 | + continue |
| 21 | + } |
| 22 | + for _, m := range fam.GetMetric() { |
| 23 | + labels := m.GetLabel() |
| 24 | + match := false |
| 25 | + for _, l := range labels { |
| 26 | + if l.GetName() == "model" && l.GetValue() == model { |
| 27 | + match = true |
| 28 | + break |
| 29 | + } |
| 30 | + } |
| 31 | + if match { |
| 32 | + h := m.GetHistogram() |
| 33 | + if h != nil && h.SampleCount != nil { |
| 34 | + return h.GetSampleCount() |
| 35 | + } |
| 36 | + } |
| 37 | + } |
| 38 | + } |
| 39 | + return 0 |
| 40 | +} |
| 41 | + |
| 42 | +var _ = Describe("Metrics recording", func() { |
| 43 | + var router *OpenAIRouter |
| 44 | + |
| 45 | + BeforeEach(func() { |
| 46 | + // Use a minimal router that doesn't require external models |
| 47 | + router = &OpenAIRouter{} |
| 48 | + // Initialize internal maps used by handlers |
| 49 | + router.InitializeForTesting() |
| 50 | + }) |
| 51 | + |
| 52 | + It("records TTFT on response headers", func() { |
| 53 | + ctx := &RequestContext{ |
| 54 | + RequestModel: "model-a", |
| 55 | + ProcessingStartTime: time.Now().Add(-75 * time.Millisecond), |
| 56 | + } |
| 57 | + |
| 58 | + before := getHistogramSampleCount("llm_model_ttft_seconds", ctx.RequestModel) |
| 59 | + |
| 60 | + respHeaders := &ext_proc.ProcessingRequest_ResponseHeaders{ |
| 61 | + ResponseHeaders: &ext_proc.HttpHeaders{ |
| 62 | + Headers: &core.HeaderMap{Headers: []*core.HeaderValue{{Key: "content-type", Value: "application/json"}}}, |
| 63 | + }, |
| 64 | + } |
| 65 | + |
| 66 | + response, err := router.handleResponseHeaders(respHeaders, ctx) |
| 67 | + Expect(err).NotTo(HaveOccurred()) |
| 68 | + Expect(response.GetResponseHeaders()).NotTo(BeNil()) |
| 69 | + |
| 70 | + after := getHistogramSampleCount("llm_model_ttft_seconds", ctx.RequestModel) |
| 71 | + Expect(after).To(BeNumerically(">", before)) |
| 72 | + Expect(ctx.TTFTRecorded).To(BeTrue()) |
| 73 | + Expect(ctx.TTFTSeconds).To(BeNumerically(">", 0)) |
| 74 | + }) |
| 75 | + |
| 76 | + It("records TPOT on response body", func() { |
| 77 | + ctx := &RequestContext{ |
| 78 | + RequestID: "tpot-test-1", |
| 79 | + RequestModel: "model-a", |
| 80 | + StartTime: time.Now().Add(-1 * time.Second), |
| 81 | + } |
| 82 | + |
| 83 | + before := getHistogramSampleCount("llm_model_tpot_seconds", ctx.RequestModel) |
| 84 | + |
| 85 | + openAIResponse := map[string]interface{}{ |
| 86 | + "id": "chatcmpl-xyz", |
| 87 | + "object": "chat.completion", |
| 88 | + "created": time.Now().Unix(), |
| 89 | + "model": ctx.RequestModel, |
| 90 | + "usage": map[string]interface{}{ |
| 91 | + "prompt_tokens": 10, |
| 92 | + "completion_tokens": 5, |
| 93 | + "total_tokens": 15, |
| 94 | + }, |
| 95 | + "choices": []map[string]interface{}{ |
| 96 | + { |
| 97 | + "message": map[string]interface{}{"role": "assistant", "content": "Hello"}, |
| 98 | + "finish_reason": "stop", |
| 99 | + }, |
| 100 | + }, |
| 101 | + } |
| 102 | + respBodyJSON, err := json.Marshal(openAIResponse) |
| 103 | + Expect(err).NotTo(HaveOccurred()) |
| 104 | + |
| 105 | + respBody := &ext_proc.ProcessingRequest_ResponseBody{ |
| 106 | + ResponseBody: &ext_proc.HttpBody{Body: respBodyJSON}, |
| 107 | + } |
| 108 | + |
| 109 | + response, err := router.handleResponseBody(respBody, ctx) |
| 110 | + Expect(err).NotTo(HaveOccurred()) |
| 111 | + Expect(response.GetResponseBody()).NotTo(BeNil()) |
| 112 | + |
| 113 | + after := getHistogramSampleCount("llm_model_tpot_seconds", ctx.RequestModel) |
| 114 | + Expect(after).To(BeNumerically(">", before)) |
| 115 | + }) |
| 116 | +}) |
0 commit comments