Skip to content

Commit 3541a37

Browse files
committed
fix review comment
Signed-off-by: googs1025 <[email protected]>
1 parent 597ed25 commit 3541a37

File tree

3 files changed

+247
-11
lines changed

3 files changed

+247
-11
lines changed

pkg/common/config.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,34 @@ func (c *Configuration) validate() error {
471471
if c.FakeMetrics.KVCacheUsagePercentage < 0 || c.FakeMetrics.KVCacheUsagePercentage > 1 {
472472
return errors.New("fake metrics KV cache usage must be between 0 ans 1")
473473
}
474+
if c.FakeMetrics.RequestSuccessTotal != nil {
475+
for reason, count := range c.FakeMetrics.RequestSuccessTotal {
476+
if count < 0 {
477+
return fmt.Errorf("fake metrics request-success-total.%s cannot be negative, got %d", reason, count)
478+
}
479+
}
480+
requiredReasons := []string{StopFinishReason, LengthFinishReason, ToolsFinishReason, RemoteDecodeFinishReason}
481+
for _, reason := range requiredReasons {
482+
if _, exists := c.FakeMetrics.RequestSuccessTotal[reason]; !exists {
483+
return fmt.Errorf("missing required finish reason in request-success-total: %s", reason)
484+
}
485+
}
486+
}
487+
for _, v := range c.FakeMetrics.RequestPromptTokens {
488+
if v < 0 {
489+
return errors.New("fake metrics request-prompt-tokens cannot contain negative values")
490+
}
491+
}
492+
for _, v := range c.FakeMetrics.RequestGenerationTokens {
493+
if v < 0 {
494+
return errors.New("fake metrics request-generation-tokens cannot contain negative values")
495+
}
496+
}
497+
for _, v := range c.FakeMetrics.RequestParamsMaxTokens {
498+
if v < 0 {
499+
return errors.New("fake metrics request-params-max-tokens cannot contain negative values")
500+
}
501+
}
474502
}
475503

476504
if c.DPSize < 1 || c.DPSize > 8 {

pkg/llm-d-inference-sim/metrics.go

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -164,17 +164,20 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
164164
nRunningReqs = float64(s.config.FakeMetrics.RunningRequests)
165165
nWaitingReqs = float64(s.config.FakeMetrics.WaitingRequests)
166166
kvCacheUsage = float64(s.config.FakeMetrics.KVCacheUsagePercentage)
167-
for _, requestPromptToken := range s.config.FakeMetrics.RequestPromptTokens {
168-
s.requestPromptTokens.WithLabelValues(modelName).Observe(requestPromptToken)
167+
for reason, requestSuccessTotal := range s.config.FakeMetrics.RequestSuccessTotal {
168+
s.requestSuccessTotal.WithLabelValues(modelName, reason).Add(float64(requestSuccessTotal))
169169
}
170-
for _, requestGenerationToken := range s.config.FakeMetrics.RequestGenerationTokens {
171-
s.requestGenerationTokens.WithLabelValues(modelName).Observe(requestGenerationToken)
170+
buckets := build125Buckets(s.config.MaxModelLen)
171+
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestPromptTokens) {
172+
s.requestPromptTokens.WithLabelValues(modelName).Observe(sample)
172173
}
173-
for _, requestParamsMaxToken := range s.config.FakeMetrics.RequestParamsMaxTokens {
174-
s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(requestParamsMaxToken)
174+
175+
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestGenerationTokens) {
176+
s.requestGenerationTokens.WithLabelValues(modelName).Observe(sample)
175177
}
176-
for reason, requestSuccessTotal := range s.config.FakeMetrics.RequestSuccessTotal {
177-
s.requestSuccessTotal.WithLabelValues(modelName, reason).Add(float64(requestSuccessTotal))
178+
179+
for _, sample := range generateSamplesFromBuckets(buckets, s.config.FakeMetrics.RequestParamsMaxTokens) {
180+
s.requestParamsMaxTokens.WithLabelValues(modelName).Observe(sample)
178181
}
179182

180183
}
@@ -424,3 +427,21 @@ func build125Buckets(maxValue int) []float64 {
424427
}
425428
return buckets
426429
}
430+
431+
func generateSamplesFromBuckets(boundaries []float64, counts []float64) []float64 {
432+
var samples []float64
433+
prev := 0.0
434+
for i, count := range counts {
435+
boundary := boundaries[i]
436+
// 在 (prev, boundary] 区间内取一个中间值作为样本代表
437+
val := (prev + boundary) / 2
438+
if val >= boundary {
439+
val = boundary * 0.9 // 防止越界
440+
}
441+
for j := int64(0); j < int64(count); j++ {
442+
samples = append(samples, val)
443+
}
444+
prev = boundary
445+
}
446+
return samples
447+
}

pkg/llm-d-inference-sim/metrics_test.go

Lines changed: 190 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
118118

119119
It("Should record correct prompt and generation token counts", func() {
120120
modelName := "testmodel"
121-
prompt := strings.Repeat("hello ", 10)
121+
prompt := strings.Repeat("hello ", 25)
122122
maxTokens := 25
123123

124124
ctx := context.TODO()
@@ -153,10 +153,38 @@ var _ = Describe("Simulator metrics", Ordered, func() {
153153
data, err := io.ReadAll(metricsResp.Body)
154154
Expect(err).NotTo(HaveOccurred())
155155
metrics := string(data)
156+
// request_prompt_tokens_bucket
157+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="1"} 0`))
158+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="2"} 0`))
159+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="5"} 0`))
160+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="10"} 0`))
161+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="20"} 0`))
156162
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="50"} 1`))
163+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="100"} 1`))
164+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="200"} 1`))
165+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="500"} 1`))
166+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="100"} 1`))
167+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="+Inf"} 1`))
168+
// request_params_max_tokens_bucket
169+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="1"} 0`))
170+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="2"} 0`))
171+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="5"} 0`))
172+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="10"} 0`))
173+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="20"} 0`))
157174
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="50"} 1`))
175+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="100"} 1`))
176+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="200"} 1`))
177+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="500"} 1`))
178+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="1000"} 1`))
179+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="+Inf"} 1`))
180+
// request_generation_tokens
181+
// We do not verify the distribution of the number of tokens generated per request,
182+
// as the number of generated tokens is unpredictable in this test.
183+
// Therefore, we only verify the number of requests and the total number of generated tokens,
184+
// and skip the bucket distribution.
158185
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_count{model_name="testmodel"} 1`))
159-
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="stop",model_name="testmodel"} 1`))
186+
// request_success_total
187+
Expect(metrics).To(MatchRegexp(`vllm:request_success_total{finish_reason="(stop|length)",model_name="testmodel"} 1`))
160188
})
161189

162190
It("Should send correct lora metrics", func() {
@@ -518,7 +546,32 @@ var _ = Describe("Simulator metrics", Ordered, func() {
518546
ctx := context.TODO()
519547
args := []string{"cmd", "--model", model, "--mode", common.ModeRandom,
520548
"--fake-metrics",
521-
"{\"running-requests\":10,\"waiting-requests\":30,\"kv-cache-usage\":0.4,\"loras\":[{\"running\":\"lora4,lora2\",\"waiting\":\"lora3\",\"timestamp\":1257894567},{\"running\":\"lora4,lora3\",\"waiting\":\"\",\"timestamp\":1257894569}]}",
549+
`{` +
550+
`"running-requests":10,` +
551+
`"waiting-requests":30,` +
552+
`"kv-cache-usage":0.4,` +
553+
`"request-success-total":{` +
554+
`"stop":20,` +
555+
`"length":0,` +
556+
`"tool_calls":0,` +
557+
`"remote_decode":0` +
558+
`},` +
559+
`"request-prompt-tokens":[10,20,30],` +
560+
`"request-generation-tokens":[10,20,30],` +
561+
`"request-params-max-tokens":[10,20,30],` +
562+
`"loras":[` +
563+
`{` +
564+
`"running":"lora4,lora2",` +
565+
`"waiting":"lora3",` +
566+
`"timestamp":1257894567` +
567+
`},` +
568+
`{` +
569+
`"running":"lora4,lora3",` +
570+
`"waiting":"",` +
571+
`"timestamp":1257894569` +
572+
`}` +
573+
`]` +
574+
`}`,
522575
}
523576

524577
client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
@@ -536,6 +589,48 @@ var _ = Describe("Simulator metrics", Ordered, func() {
536589
Expect(metrics).To(ContainSubstring("vllm:gpu_cache_usage_perc{model_name=\"my_model\"} 0.4"))
537590
Expect(metrics).To(ContainSubstring("vllm:lora_requests_info{max_lora=\"1\",running_lora_adapters=\"lora4,lora2\",waiting_lora_adapters=\"lora3\"} 1.257894567e+09"))
538591
Expect(metrics).To(ContainSubstring("vllm:lora_requests_info{max_lora=\"1\",running_lora_adapters=\"lora4,lora3\",waiting_lora_adapters=\"\"} 1.257894569e+09"))
592+
593+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="1"} 10`))
594+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="2"} 30`))
595+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="5"} 60`))
596+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="10"} 60`))
597+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="20"} 60`))
598+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="50"} 60`))
599+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="100"} 60`))
600+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="200"} 60`))
601+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="500"} 60`))
602+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="1000"} 60`))
603+
Expect(metrics).To(ContainSubstring(`vllm:request_generation_tokens_bucket{model_name="my_model",le="+Inf"} 60`))
604+
605+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="1"} 10`))
606+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="2"} 30`))
607+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="5"} 60`))
608+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="10"} 60`))
609+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="20"} 60`))
610+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="50"} 60`))
611+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="100"} 60`))
612+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="200"} 60`))
613+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="500"} 60`))
614+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="1000"} 60`))
615+
Expect(metrics).To(ContainSubstring(`vllm:request_prompt_tokens_bucket{model_name="my_model",le="+Inf"} 60`))
616+
617+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="1"} 10`))
618+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="2"} 30`))
619+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="5"} 60`))
620+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="10"} 60`))
621+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="20"} 60`))
622+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="50"} 60`))
623+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="100"} 60`))
624+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="200"} 60`))
625+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="500"} 60`))
626+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="1000"} 60`))
627+
Expect(metrics).To(ContainSubstring(`vllm:request_params_max_tokens_bucket{model_name="my_model",le="+Inf"} 60`))
628+
629+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="length",model_name="my_model"} 0`))
630+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="remote_decode",model_name="my_model"} 0`))
631+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="stop",model_name="my_model"} 20`))
632+
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="tool_calls",model_name="my_model"} 0`))
633+
539634
})
540635
})
541636
})
@@ -691,3 +786,95 @@ func TestBuild125Buckets(t *testing.T) {
691786
})
692787
}
693788
}
789+
790+
func validateSamplesInBuckets(t *testing.T, samples []float64, boundaries []float64, counts []float64) {
791+
if len(boundaries) != len(counts) {
792+
t.Fatalf("boundaries and counts length mismatch: %d vs %d", len(boundaries), len(counts))
793+
}
794+
795+
prev := 0.0
796+
for i, count := range counts {
797+
if count == 0 {
798+
prev = boundaries[i]
799+
continue
800+
}
801+
802+
lower, upper := prev, boundaries[i]
803+
valueInBucket := 0
804+
805+
for _, v := range samples {
806+
if v > lower && v <= upper {
807+
valueInBucket++
808+
}
809+
}
810+
if valueInBucket != int(count) {
811+
t.Errorf("bucket[%d] (%.3f, %.3f]: want %d samples, got %d",
812+
i, lower, upper, int(count), valueInBucket)
813+
}
814+
prev = upper
815+
}
816+
817+
totalExpected := 0
818+
for _, c := range counts {
819+
totalExpected += int(c)
820+
}
821+
if len(samples) != totalExpected {
822+
t.Errorf("total samples: want %d, got %d", totalExpected, len(samples))
823+
}
824+
}
825+
826+
func TestGenerateSamplesFromBuckets(t *testing.T) {
827+
tests := []struct {
828+
name string
829+
boundaries []float64
830+
counts []float64
831+
expectedSamples int
832+
}{
833+
{
834+
name: "normal 4 case",
835+
boundaries: []float64{1.0, 2.0, 5.0, 10.0},
836+
counts: []float64{10, 20, 30, 15},
837+
expectedSamples: 75,
838+
},
839+
{
840+
name: "zero count in middle",
841+
boundaries: []float64{1.0, 2.0, 5.0},
842+
counts: []float64{5, 0, 10},
843+
expectedSamples: 15,
844+
},
845+
{
846+
name: "single bucket",
847+
boundaries: []float64{10.0},
848+
counts: []float64{5},
849+
expectedSamples: 5,
850+
},
851+
{
852+
name: "all zeros",
853+
boundaries: []float64{1, 2, 5},
854+
counts: []float64{0, 0, 0},
855+
expectedSamples: 0,
856+
},
857+
{
858+
name: "large numbers",
859+
boundaries: []float64{100, 1000, 10000},
860+
counts: []float64{1000, 2000, 3000},
861+
expectedSamples: 6000,
862+
},
863+
{
864+
name: "empty inputs",
865+
boundaries: []float64{},
866+
counts: []float64{},
867+
expectedSamples: 0,
868+
},
869+
}
870+
871+
for _, tt := range tests {
872+
t.Run(tt.name, func(t *testing.T) {
873+
samples := generateSamplesFromBuckets(tt.boundaries, tt.counts)
874+
if len(samples) != tt.expectedSamples {
875+
t.Fatalf("sample count mismatch: want %d, got %d", tt.expectedSamples, len(samples))
876+
}
877+
validateSamplesInBuckets(t, samples, tt.boundaries, tt.counts)
878+
})
879+
}
880+
}

0 commit comments

Comments
 (0)