Skip to content

Commit 8c622c6

Browse files
committed
fix review comment
Signed-off-by: googs1025 <[email protected]>
1 parent 4122fb7 commit 8c622c6

File tree

3 files changed

+301
-2
lines changed

3 files changed

+301
-2
lines changed

pkg/common/config.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,34 @@ func (c *Configuration) validate() error {
508508
if c.FakeMetrics.KVCacheUsagePercentage < 0 || c.FakeMetrics.KVCacheUsagePercentage > 1 {
509509
return errors.New("fake metrics KV cache usage must be between 0 ans 1")
510510
}
511+
if c.FakeMetrics.RequestSuccessTotal != nil {
512+
for reason, count := range c.FakeMetrics.RequestSuccessTotal {
513+
if count < 0 {
514+
return fmt.Errorf("fake metrics request-success-total.%s cannot be negative, got %d", reason, count)
515+
}
516+
}
517+
requiredReasons := []string{StopFinishReason, LengthFinishReason, ToolsFinishReason, RemoteDecodeFinishReason}
518+
for _, reason := range requiredReasons {
519+
if _, exists := c.FakeMetrics.RequestSuccessTotal[reason]; !exists {
520+
return fmt.Errorf("missing required finish reason in request-success-total: %s", reason)
521+
}
522+
}
523+
}
524+
for _, v := range c.FakeMetrics.RequestPromptTokens {
525+
if v < 0 {
526+
return errors.New("fake metrics request-prompt-tokens cannot contain negative values")
527+
}
528+
}
529+
for _, v := range c.FakeMetrics.RequestGenerationTokens {
530+
if v < 0 {
531+
return errors.New("fake metrics request-generation-tokens cannot contain negative values")
532+
}
533+
}
534+
for _, v := range c.FakeMetrics.RequestParamsMaxTokens {
535+
if v < 0 {
536+
return errors.New("fake metrics request-params-max-tokens cannot contain negative values")
537+
}
538+
}
511539
}
512540

513541
if c.DPSize < 1 || c.DPSize > 8 {

pkg/llm-d-inference-sim/metrics.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
194194
nRunningReqs = float64(s.config.FakeMetrics.RunningRequests)
195195
nWaitingReqs = float64(s.config.FakeMetrics.WaitingRequests)
196196
kvCacheUsage = float64(s.config.FakeMetrics.KVCacheUsagePercentage)
197+
<<<<<<< HEAD
197198

198199
if s.config.FakeMetrics.TTFTBucketValues != nil {
199200
s.initFakeHistogram(s.ttft, common.TTFTBucketsBoundaries, s.config.FakeMetrics.TTFTBucketValues)
@@ -211,9 +212,23 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
211212
for _, requestParamsMaxToken := range s.config.FakeMetrics.RequestParamsMaxTokens {
212213
s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(requestParamsMaxToken)
213214
}
215+
=======
216+
>>>>>>> 02bc5c3 (fix review comment)
214217
for reason, requestSuccessTotal := range s.config.FakeMetrics.RequestSuccessTotal {
215218
s.requestSuccessTotal.WithLabelValues(modelName, reason).Add(float64(requestSuccessTotal))
216219
}
220+
buckets := build125Buckets(s.config.MaxModelLen)
221+
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestPromptTokens) {
222+
s.requestPromptTokens.WithLabelValues(modelName).Observe(sample)
223+
}
224+
225+
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestGenerationTokens) {
226+
s.requestGenerationTokens.WithLabelValues(modelName).Observe(sample)
227+
}
228+
229+
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestParamsMaxTokens) {
230+
s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(sample)
231+
}
217232

218233
}
219234

@@ -539,3 +554,55 @@ func build125Buckets(maxValue int) []float64 {
539554
}
540555
return buckets
541556
}
557+
558+
// padCountsToFull pads the counts slice to length len(boundaries)+1 by appending zeros.
559+
func padCountsToFull(boundaries []float64, counts []float64) []float64 {
560+
fullLen := len(boundaries) + 1
561+
if len(counts) > fullLen {
562+
return counts[:fullLen] // just return limit len
563+
}
564+
padded := make([]float64, fullLen)
565+
copy(padded, counts)
566+
// rest are zero by default
567+
return padded
568+
}
569+
570+
func generateSamplesFromBuckets(boundaries []float64, counts []float64) []float64 {
571+
fullCounts := padCountsToFull(boundaries, counts)
572+
var samples []float64
573+
574+
for i, count := range fullCounts {
575+
if count == 0 {
576+
continue
577+
}
578+
579+
var val float64
580+
if len(boundaries) == 0 {
581+
// No boundaries → one bucket (-Inf, +Inf)
582+
val = 1.0
583+
} else if i == 0 {
584+
// Bucket: (-Inf, boundaries[0]]
585+
val = boundaries[0] - 1.0
586+
if val <= 0 { // avoid non-positive if boundary is small
587+
val = boundaries[0] * 0.5
588+
}
589+
} else if i < len(boundaries) {
590+
// Bucket: (boundaries[i-1], boundaries[i]]
591+
lower := boundaries[i-1]
592+
upper := boundaries[i]
593+
val = (lower + upper) / 2.0
594+
// Ensure it's strictly > lower and <= upper
595+
if val <= lower {
596+
val = upper - (upper-lower)*0.1
597+
}
598+
} else {
599+
// Last bucket: (boundaries[len-1], +Inf)
600+
val = boundaries[len(boundaries)-1] + 1.0
601+
}
602+
603+
for j := 0; j < int(count); j++ {
604+
samples = append(samples, val)
605+
}
606+
}
607+
return samples
608+
}

pkg/llm-d-inference-sim/metrics_test.go

Lines changed: 206 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ package llmdinferencesim
1919
import (
2020
"context"
2121
"errors"
22+
"fmt"
2223
"io"
24+
"math"
2325
"net/http"
2426
"os"
2527
"reflect"
@@ -109,7 +111,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
109111

110112
It("Should record correct prompt and generation token counts", func() {
111113
modelName := "testmodel"
112-
prompt := strings.Repeat("hello ", 10)
114+
prompt := strings.Repeat("hello ", 25)
113115
maxTokens := 25
114116

115117
ctx := context.TODO()
@@ -144,10 +146,38 @@ var _ = Describe("Simulator metrics", Ordered, func() {
144146
data, err := io.ReadAll(metricsResp.Body)
145147
Expect(err).NotTo(HaveOccurred())
146148
metrics := string(data)
149+
// request_prompt_tokens_bucket
150+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="1"} 0`))
151+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="2"} 0`))
152+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="5"} 0`))
153+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="10"} 0`))
154+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="20"} 0`))
147155
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="50"} 1`))
156+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="100"} 1`))
157+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="200"} 1`))
158+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="500"} 1`))
159+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="100"} 1`))
160+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="+Inf"} 1`))
161+
// request_params_max_tokens_bucket
162+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="1"} 0`))
163+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="2"} 0`))
164+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="5"} 0`))
165+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="10"} 0`))
166+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="20"} 0`))
148167
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="50"} 1`))
168+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="100"} 1`))
169+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="200"} 1`))
170+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="500"} 1`))
171+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="1000"} 1`))
172+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="+Inf"} 1`))
173+
// request_generation_tokens
174+
// We do not verify the distribution of the number of tokens generated per request,
175+
// as the number of generated tokens is unpredictable in this test.
176+
// Therefore, we only verify the number of requests and the total number of generated tokens,
177+
// and skip the bucket distribution.
149178
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_count{model_name="testmodel"} 1`))
150-
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="stop",model_name="testmodel"} 1`))
179+
// request_success_total
180+
Expect(metrics).To(MatchRegexp(`vllm:request_success_total{finish_reason="(stop|length)",model_name="testmodel"} 1`))
151181
})
152182

153183
It("Should send correct lora metrics", func() {
@@ -632,7 +662,36 @@ var _ = Describe("Simulator metrics", Ordered, func() {
632662
ctx := context.TODO()
633663
args := []string{"cmd", "--model", model, "--mode", common.ModeRandom,
634664
"--fake-metrics",
665+
<<<<<<< HEAD
635666
"{\"running-requests\":10,\"waiting-requests\":30,\"kv-cache-usage\":0.4,\"loras\":[{\"running\":\"lora4,lora2\",\"waiting\":\"lora3\",\"timestamp\":1257894567},{\"running\":\"lora4,lora3\",\"waiting\":\"\",\"timestamp\":1257894569}],\"ttft-buckets-values\":[1, 2, 3],\"tpot-buckets-values\": [0, 0, 1, 2, 3]}",
667+
=======
668+
`{` +
669+
`"running-requests":10,` +
670+
`"waiting-requests":30,` +
671+
`"kv-cache-usage":0.4,` +
672+
`"request-success-total":{` +
673+
`"stop":20,` +
674+
`"length":0,` +
675+
`"tool_calls":0,` +
676+
`"remote_decode":0` +
677+
`},` +
678+
`"request-prompt-tokens":[10,20,30],` +
679+
`"request-generation-tokens":[10,20,30],` +
680+
`"request-params-max-tokens":[10,20,30],` +
681+
`"loras":[` +
682+
`{` +
683+
`"running":"lora4,lora2",` +
684+
`"waiting":"lora3",` +
685+
`"timestamp":1257894567` +
686+
`},` +
687+
`{` +
688+
`"running":"lora4,lora3",` +
689+
`"waiting":"",` +
690+
`"timestamp":1257894569` +
691+
`}` +
692+
`]` +
693+
`}`,
694+
>>>>>>> 02bc5c3 (fix review comment)
636695
}
637696

638697
client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
@@ -651,6 +710,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
651710
Expect(metrics).To(ContainSubstring("vllm:lora_requests_info{max_lora=\"1\",running_lora_adapters=\"lora4,lora2\",waiting_lora_adapters=\"lora3\"} 1.257894567e+09"))
652711
Expect(metrics).To(ContainSubstring("vllm:lora_requests_info{max_lora=\"1\",running_lora_adapters=\"lora4,lora3\",waiting_lora_adapters=\"\"} 1.257894569e+09"))
653712

713+
<<<<<<< HEAD
654714
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"0.001\"} 1"))
655715
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"0.005\"} 3"))
656716
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"0.01\"} 6"))
@@ -707,6 +767,49 @@ var _ = Describe("Simulator metrics", Ordered, func() {
707767
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"640\"} 0"))
708768
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"2560\"} 0"))
709769
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"+Inf\"} 1"))
770+
=======
771+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="1"} 10`))
772+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="2"} 30`))
773+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="5"} 60`))
774+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="10"} 60`))
775+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="20"} 60`))
776+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="50"} 60`))
777+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="100"} 60`))
778+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="200"} 60`))
779+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="500"} 60`))
780+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="1000"} 60`))
781+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="+Inf"} 60`))
782+
783+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="1"} 10`))
784+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="2"} 30`))
785+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="5"} 60`))
786+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="10"} 60`))
787+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="20"} 60`))
788+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="50"} 60`))
789+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="100"} 60`))
790+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="200"} 60`))
791+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="500"} 60`))
792+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="1000"} 60`))
793+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="+Inf"} 60`))
794+
795+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="1"} 10`))
796+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="2"} 30`))
797+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="5"} 60`))
798+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="10"} 60`))
799+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="20"} 60`))
800+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="50"} 60`))
801+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="100"} 60`))
802+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="200"} 60`))
803+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="500"} 60`))
804+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="1000"} 60`))
805+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="+Inf"} 60`))
806+
807+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="length",model_name="my_model"} 0`))
808+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="remote_decode",model_name="my_model"} 0`))
809+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="stop",model_name="my_model"} 20`))
810+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="tool_calls",model_name="my_model"} 0`))
811+
812+
>>>>>>> 02bc5c3 (fix review comment)
710813
})
711814
})
712815
})
@@ -890,3 +993,104 @@ func TestBuild125Buckets(t *testing.T) {
890993
})
891994
}
892995
}
996+
997+
func validateSamplesInBuckets(t *testing.T, samples []float64, boundaries []float64, counts []float64) {
998+
fullCounts := padCountsToFull(boundaries, counts)
999+
// Now validate using fullCounts
1000+
sortedSamples := make([]float64, len(samples))
1001+
copy(sortedSamples, samples)
1002+
sort.Float64s(sortedSamples)
1003+
1004+
actualCounts := make([]int, len(fullCounts))
1005+
sampleIndex := 0
1006+
1007+
for bucketIndex := range fullCounts {
1008+
var upperBound float64
1009+
if bucketIndex == len(boundaries) {
1010+
upperBound = math.Inf(+1)
1011+
} else {
1012+
upperBound = boundaries[bucketIndex]
1013+
}
1014+
1015+
for sampleIndex < len(sortedSamples) && sortedSamples[sampleIndex] <= upperBound {
1016+
actualCounts[bucketIndex]++
1017+
sampleIndex++
1018+
}
1019+
}
1020+
1021+
// Verify each bucket
1022+
for i, want := range fullCounts {
1023+
if actualCounts[i] != int(want) {
1024+
var lowerStr, upperStr string
1025+
if i == 0 {
1026+
lowerStr = "-Inf"
1027+
} else {
1028+
lowerStr = fmt.Sprintf("%.3f", boundaries[i-1])
1029+
}
1030+
if i == len(boundaries) {
1031+
upperStr = "+Inf"
1032+
} else {
1033+
upperStr = fmt.Sprintf("%.3f", boundaries[i])
1034+
}
1035+
t.Errorf("bucket[%d] (%s, %s]: want %d, got %d",
1036+
i, lowerStr, upperStr, int(want), actualCounts[i])
1037+
}
1038+
}
1039+
1040+
totalExpected := 0
1041+
for _, c := range fullCounts {
1042+
totalExpected += int(c)
1043+
}
1044+
if len(samples) != totalExpected {
1045+
t.Errorf("total samples: want %d, got %d", totalExpected, len(samples))
1046+
}
1047+
}
1048+
1049+
func TestGenerateSamplesFromBuckets(t *testing.T) {
1050+
tests := []struct {
1051+
name string
1052+
boundaries []float64
1053+
counts []float64
1054+
expectedSamples int
1055+
}{
1056+
{
1057+
name: "short counts with non-zero in middle",
1058+
boundaries: []float64{1, 2, 5, 10, 20, 50, 100, 200, 500, 1000},
1059+
counts: []float64{0, 0, 0, 5, 0, 5},
1060+
expectedSamples: 10,
1061+
},
1062+
{
1063+
name: "empty boundaries → 1 bucket",
1064+
boundaries: []float64{},
1065+
counts: []float64{7},
1066+
expectedSamples: 7,
1067+
},
1068+
{
1069+
name: "single boundary → 2 buckets, short counts",
1070+
boundaries: []float64{10.0},
1071+
counts: []float64{3},
1072+
expectedSamples: 3,
1073+
},
1074+
{
1075+
name: "full counts provided",
1076+
boundaries: []float64{1, 2, 5},
1077+
counts: []float64{1, 0, 2, 1},
1078+
expectedSamples: 4,
1079+
},
1080+
{
1081+
name: "all zeros (short)",
1082+
boundaries: []float64{1, 2, 5},
1083+
counts: []float64{},
1084+
expectedSamples: 0,
1085+
},
1086+
}
1087+
for _, tt := range tests {
1088+
t.Run(tt.name, func(t *testing.T) {
1089+
samples := generateSamplesFromBuckets(tt.boundaries, tt.counts)
1090+
if len(samples) != tt.expectedSamples {
1091+
t.Fatalf("sample count mismatch: want %d, got %d", tt.expectedSamples, len(samples))
1092+
}
1093+
validateSamplesInBuckets(t, samples, tt.boundaries, tt.counts)
1094+
})
1095+
}
1096+
}

0 commit comments

Comments
 (0)