Skip to content

Commit 02bc5c3

Browse files
committed
fix review comment
Signed-off-by: googs1025 <[email protected]>
1 parent 597ed25 commit 02bc5c3

File tree

3 files changed

+292
-11
lines changed

3 files changed

+292
-11
lines changed

pkg/common/config.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,34 @@ func (c *Configuration) validate() error {
471471
if c.FakeMetrics.KVCacheUsagePercentage < 0 || c.FakeMetrics.KVCacheUsagePercentage > 1 {
472472
return errors.New("fake metrics KV cache usage must be between 0 ans 1")
473473
}
474+
if c.FakeMetrics.RequestSuccessTotal != nil {
475+
for reason, count := range c.FakeMetrics.RequestSuccessTotal {
476+
if count < 0 {
477+
return fmt.Errorf("fake metrics request-success-total.%s cannot be negative, got %d", reason, count)
478+
}
479+
}
480+
requiredReasons := []string{StopFinishReason, LengthFinishReason, ToolsFinishReason, RemoteDecodeFinishReason}
481+
for _, reason := range requiredReasons {
482+
if _, exists := c.FakeMetrics.RequestSuccessTotal[reason]; !exists {
483+
return fmt.Errorf("missing required finish reason in request-success-total: %s", reason)
484+
}
485+
}
486+
}
487+
for _, v := range c.FakeMetrics.RequestPromptTokens {
488+
if v < 0 {
489+
return errors.New("fake metrics request-prompt-tokens cannot contain negative values")
490+
}
491+
}
492+
for _, v := range c.FakeMetrics.RequestGenerationTokens {
493+
if v < 0 {
494+
return errors.New("fake metrics request-generation-tokens cannot contain negative values")
495+
}
496+
}
497+
for _, v := range c.FakeMetrics.RequestParamsMaxTokens {
498+
if v < 0 {
499+
return errors.New("fake metrics request-params-max-tokens cannot contain negative values")
500+
}
501+
}
474502
}
475503

476504
if c.DPSize < 1 || c.DPSize > 8 {

pkg/llm-d-inference-sim/metrics.go

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -164,17 +164,20 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
164164
nRunningReqs = float64(s.config.FakeMetrics.RunningRequests)
165165
nWaitingReqs = float64(s.config.FakeMetrics.WaitingRequests)
166166
kvCacheUsage = float64(s.config.FakeMetrics.KVCacheUsagePercentage)
167-
for _, requestPromptToken := range s.config.FakeMetrics.RequestPromptTokens {
168-
s.requestPromptTokens.WithLabelValues(modelName).Observe(requestPromptToken)
167+
for reason, requestSuccessTotal := range s.config.FakeMetrics.RequestSuccessTotal {
168+
s.requestSuccessTotal.WithLabelValues(modelName, reason).Add(float64(requestSuccessTotal))
169169
}
170-
for _, requestGenerationToken := range s.config.FakeMetrics.RequestGenerationTokens {
171-
s.requestGenerationTokens.WithLabelValues(modelName).Observe(requestGenerationToken)
170+
buckets := build125Buckets(s.config.MaxModelLen)
171+
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestPromptTokens) {
172+
s.requestPromptTokens.WithLabelValues(modelName).Observe(sample)
172173
}
173-
for _, requestParamsMaxToken := range s.config.FakeMetrics.RequestParamsMaxTokens {
174-
s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(requestParamsMaxToken)
174+
175+
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestGenerationTokens) {
176+
s.requestGenerationTokens.WithLabelValues(modelName).Observe(sample)
175177
}
176-
for reason, requestSuccessTotal := range s.config.FakeMetrics.RequestSuccessTotal {
177-
s.requestSuccessTotal.WithLabelValues(modelName, reason).Add(float64(requestSuccessTotal))
178+
179+
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestParamsMaxTokens) {
180+
s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(sample)
178181
}
179182

180183
}
@@ -424,3 +427,55 @@ func build125Buckets(maxValue int) []float64 {
424427
}
425428
return buckets
426429
}
430+
431+
// padCountsToFull pads the counts slice to length len(boundaries)+1 by appending zeros.
432+
func padCountsToFull(boundaries []float64, counts []float64) []float64 {
433+
fullLen := len(boundaries) + 1
434+
if len(counts) > fullLen {
435+
return counts[:fullLen] // just return limit len
436+
}
437+
padded := make([]float64, fullLen)
438+
copy(padded, counts)
439+
// rest are zero by default
440+
return padded
441+
}
442+
443+
func generateSamplesFromBuckets(boundaries []float64, counts []float64) []float64 {
444+
fullCounts := padCountsToFull(boundaries, counts)
445+
var samples []float64
446+
447+
for i, count := range fullCounts {
448+
if count == 0 {
449+
continue
450+
}
451+
452+
var val float64
453+
if len(boundaries) == 0 {
454+
// No boundaries → one bucket (-Inf, +Inf)
455+
val = 1.0
456+
} else if i == 0 {
457+
// Bucket: (-Inf, boundaries[0]]
458+
val = boundaries[0] - 1.0
459+
if val <= 0 { // avoid non-positive if boundary is small
460+
val = boundaries[0] * 0.5
461+
}
462+
} else if i < len(boundaries) {
463+
// Bucket: (boundaries[i-1], boundaries[i]]
464+
lower := boundaries[i-1]
465+
upper := boundaries[i]
466+
val = (lower + upper) / 2.0
467+
// Ensure it's strictly > lower and <= upper
468+
if val <= lower {
469+
val = upper - (upper-lower)*0.1
470+
}
471+
} else {
472+
// Last bucket: (boundaries[len-1], +Inf)
473+
val = boundaries[len(boundaries)-1] + 1.0
474+
}
475+
476+
for j := 0; j < int(count); j++ {
477+
samples = append(samples, val)
478+
}
479+
}
480+
return samples
481+
}

pkg/llm-d-inference-sim/metrics_test.go

Lines changed: 201 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ package llmdinferencesim
1919
import (
2020
"context"
2121
"errors"
22+
"fmt"
2223
"io"
24+
"math"
2325
"net/http"
2426
"os"
2527
"reflect"
@@ -118,7 +120,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
118120

119121
It("Should record correct prompt and generation token counts", func() {
120122
modelName := "testmodel"
121-
prompt := strings.Repeat("hello ", 10)
123+
prompt := strings.Repeat("hello ", 25)
122124
maxTokens := 25
123125

124126
ctx := context.TODO()
@@ -153,10 +155,38 @@ var _ = Describe("Simulator metrics", Ordered, func() {
153155
data, err := io.ReadAll(metricsResp.Body)
154156
Expect(err).NotTo(HaveOccurred())
155157
metrics := string(data)
158+
// request_prompt_tokens_bucket
159+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="1"} 0`))
160+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="2"} 0`))
161+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="5"} 0`))
162+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="10"} 0`))
163+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="20"} 0`))
156164
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="50"} 1`))
165+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="100"} 1`))
166+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="200"} 1`))
167+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="500"} 1`))
168+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="100"} 1`))
169+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="+Inf"} 1`))
170+
// request_params_max_tokens_bucket
171+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="1"} 0`))
172+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="2"} 0`))
173+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="5"} 0`))
174+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="10"} 0`))
175+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="20"} 0`))
157176
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="50"} 1`))
177+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="100"} 1`))
178+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="200"} 1`))
179+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="500"} 1`))
180+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="1000"} 1`))
181+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="+Inf"} 1`))
182+
// request_generation_tokens
183+
// We do not verify the distribution of the number of tokens generated per request,
184+
// as the number of generated tokens is unpredictable in this test.
185+
// Therefore, we only verify the number of requests and the total number of generated tokens,
186+
// and skip the bucket distribution.
158187
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_count{model_name="testmodel"} 1`))
159-
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="stop",model_name="testmodel"} 1`))
188+
// request_success_total
189+
Expect(metrics).To(MatchRegexp(`vllm:request_success_total{finish_reason="(stop|length)",model_name="testmodel"} 1`))
160190
})
161191

162192
It("Should send correct lora metrics", func() {
@@ -518,7 +548,32 @@ var _ = Describe("Simulator metrics", Ordered, func() {
518548
ctx := context.TODO()
519549
args := []string{"cmd", "--model", model, "--mode", common.ModeRandom,
520550
"--fake-metrics",
521-
"{\"running-requests\":10,\"waiting-requests\":30,\"kv-cache-usage\":0.4,\"loras\":[{\"running\":\"lora4,lora2\",\"waiting\":\"lora3\",\"timestamp\":1257894567},{\"running\":\"lora4,lora3\",\"waiting\":\"\",\"timestamp\":1257894569}]}",
551+
`{` +
552+
`"running-requests":10,` +
553+
`"waiting-requests":30,` +
554+
`"kv-cache-usage":0.4,` +
555+
`"request-success-total":{` +
556+
`"stop":20,` +
557+
`"length":0,` +
558+
`"tool_calls":0,` +
559+
`"remote_decode":0` +
560+
`},` +
561+
`"request-prompt-tokens":[10,20,30],` +
562+
`"request-generation-tokens":[10,20,30],` +
563+
`"request-params-max-tokens":[10,20,30],` +
564+
`"loras":[` +
565+
`{` +
566+
`"running":"lora4,lora2",` +
567+
`"waiting":"lora3",` +
568+
`"timestamp":1257894567` +
569+
`},` +
570+
`{` +
571+
`"running":"lora4,lora3",` +
572+
`"waiting":"",` +
573+
`"timestamp":1257894569` +
574+
`}` +
575+
`]` +
576+
`}`,
522577
}
523578

524579
client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
@@ -536,6 +591,48 @@ var _ = Describe("Simulator metrics", Ordered, func() {
536591
Expect(metrics).To(ContainSubstring("vllm:gpu_cache_usage_perc{model_name=\"my_model\"} 0.4"))
537592
Expect(metrics).To(ContainSubstring("vllm:lora_requests_info{max_lora=\"1\",running_lora_adapters=\"lora4,lora2\",waiting_lora_adapters=\"lora3\"} 1.257894567e+09"))
538593
Expect(metrics).To(ContainSubstring("vllm:lora_requests_info{max_lora=\"1\",running_lora_adapters=\"lora4,lora3\",waiting_lora_adapters=\"\"} 1.257894569e+09"))
594+
595+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="1"} 10`))
596+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="2"} 30`))
597+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="5"} 60`))
598+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="10"} 60`))
599+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="20"} 60`))
600+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="50"} 60`))
601+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="100"} 60`))
602+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="200"} 60`))
603+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="500"} 60`))
604+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="1000"} 60`))
605+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="+Inf"} 60`))
606+
607+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="1"} 10`))
608+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="2"} 30`))
609+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="5"} 60`))
610+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="10"} 60`))
611+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="20"} 60`))
612+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="50"} 60`))
613+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="100"} 60`))
614+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="200"} 60`))
615+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="500"} 60`))
616+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="1000"} 60`))
617+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="+Inf"} 60`))
618+
619+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="1"} 10`))
620+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="2"} 30`))
621+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="5"} 60`))
622+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="10"} 60`))
623+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="20"} 60`))
624+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="50"} 60`))
625+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="100"} 60`))
626+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="200"} 60`))
627+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="500"} 60`))
628+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="1000"} 60`))
629+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="+Inf"} 60`))
630+
631+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="length",model_name="my_model"} 0`))
632+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="remote_decode",model_name="my_model"} 0`))
633+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="stop",model_name="my_model"} 20`))
634+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="tool_calls",model_name="my_model"} 0`))
635+
539636
})
540637
})
541638
})
@@ -691,3 +788,104 @@ func TestBuild125Buckets(t *testing.T) {
691788
})
692789
}
693790
}
791+
792+
func validateSamplesInBuckets(t *testing.T, samples []float64, boundaries []float64, counts []float64) {
793+
fullCounts := padCountsToFull(boundaries, counts)
794+
// Now validate using fullCounts
795+
sortedSamples := make([]float64, len(samples))
796+
copy(sortedSamples, samples)
797+
sort.Float64s(sortedSamples)
798+
799+
actualCounts := make([]int, len(fullCounts))
800+
sampleIndex := 0
801+
802+
for bucketIndex := range fullCounts {
803+
var upperBound float64
804+
if bucketIndex == len(boundaries) {
805+
upperBound = math.Inf(+1)
806+
} else {
807+
upperBound = boundaries[bucketIndex]
808+
}
809+
810+
for sampleIndex < len(sortedSamples) && sortedSamples[sampleIndex] <= upperBound {
811+
actualCounts[bucketIndex]++
812+
sampleIndex++
813+
}
814+
}
815+
816+
// Verify each bucket
817+
for i, want := range fullCounts {
818+
if actualCounts[i] != int(want) {
819+
var lowerStr, upperStr string
820+
if i == 0 {
821+
lowerStr = "-Inf"
822+
} else {
823+
lowerStr = fmt.Sprintf("%.3f", boundaries[i-1])
824+
}
825+
if i == len(boundaries) {
826+
upperStr = "+Inf"
827+
} else {
828+
upperStr = fmt.Sprintf("%.3f", boundaries[i])
829+
}
830+
t.Errorf("bucket[%d] (%s, %s]: want %d, got %d",
831+
i, lowerStr, upperStr, int(want), actualCounts[i])
832+
}
833+
}
834+
835+
totalExpected := 0
836+
for _, c := range fullCounts {
837+
totalExpected += int(c)
838+
}
839+
if len(samples) != totalExpected {
840+
t.Errorf("total samples: want %d, got %d", totalExpected, len(samples))
841+
}
842+
}
843+
844+
func TestGenerateSamplesFromBuckets(t *testing.T) {
845+
tests := []struct {
846+
name string
847+
boundaries []float64
848+
counts []float64
849+
expectedSamples int
850+
}{
851+
{
852+
name: "short counts with non-zero in middle",
853+
boundaries: []float64{1, 2, 5, 10, 20, 50, 100, 200, 500, 1000},
854+
counts: []float64{0, 0, 0, 5, 0, 5},
855+
expectedSamples: 10,
856+
},
857+
{
858+
name: "empty boundaries → 1 bucket",
859+
boundaries: []float64{},
860+
counts: []float64{7},
861+
expectedSamples: 7,
862+
},
863+
{
864+
name: "single boundary → 2 buckets, short counts",
865+
boundaries: []float64{10.0},
866+
counts: []float64{3},
867+
expectedSamples: 3,
868+
},
869+
{
870+
name: "full counts provided",
871+
boundaries: []float64{1, 2, 5},
872+
counts: []float64{1, 0, 2, 1},
873+
expectedSamples: 4,
874+
},
875+
{
876+
name: "all zeros (short)",
877+
boundaries: []float64{1, 2, 5},
878+
counts: []float64{},
879+
expectedSamples: 0,
880+
},
881+
}
882+
for _, tt := range tests {
883+
t.Run(tt.name, func(t *testing.T) {
884+
samples := generateSamplesFromBuckets(tt.boundaries, tt.counts)
885+
if len(samples) != tt.expectedSamples {
886+
t.Fatalf("sample count mismatch: want %d, got %d", tt.expectedSamples, len(samples))
887+
}
888+
validateSamplesInBuckets(t, samples, tt.boundaries, tt.counts)
889+
})
890+
}
891+
}

0 commit comments

Comments
 (0)