Skip to content

Commit ff4c9ea

Browse files
authored
Added cache-config-info metric (#256)
Signed-off-by: irar2 <[email protected]>
1 parent b3f93d6 commit ff4c9ea

File tree

3 files changed

+38
-3
lines changed

3 files changed

+38
-3
lines changed

pkg/llm-d-inference-sim/metrics.go

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ const (
4949
reqRunningMetricName = "vllm:num_requests_running"
5050
reqWaitingMetricName = "vllm:num_requests_waiting"
5151
gpuCacheUsageMetricName = "vllm:gpu_cache_usage_perc"
52+
cacheConfigName = "vllm:cache_config_info"
5253
)
5354

5455
// createAndRegisterPrometheus creates and registers prometheus metrics used by vLLM simulator
@@ -85,7 +86,6 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
8586
return err
8687
}
8788

88-
// not supported for now, reports constant value
8989
s.metrics.waitingRequests = prometheus.NewGaugeVec(
9090
prometheus.GaugeOpts{
9191
Subsystem: "",
@@ -288,14 +288,27 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
288288
return err
289289
}
290290

291-
s.setInitialPrometheusMetrics()
291+
cacheConfig := prometheus.NewGaugeVec(
292+
prometheus.GaugeOpts{
293+
Subsystem: "",
294+
Name: cacheConfigName,
295+
Help: "Information of the LLMEngine CacheConfig.",
296+
},
297+
[]string{vllmapi.PromLabelCacheBlockSize, vllmapi.PromLabelCacheNumGPUBlocks},
298+
)
299+
if err := s.metrics.registry.Register(cacheConfig); err != nil {
300+
s.logger.Error(err, "prometheus cache config register failed")
301+
return err
302+
}
303+
304+
s.setInitialPrometheusMetrics(cacheConfig)
292305

293306
return nil
294307
}
295308

296309
// setInitialPrometheusMetrics sends the default values to prometheus or
297310
// the fake metrics if set
298-
func (s *VllmSimulator) setInitialPrometheusMetrics() {
311+
func (s *VllmSimulator) setInitialPrometheusMetrics(cacheConfig *prometheus.GaugeVec) {
299312
var nRunningReqs, nWaitingReqs, kvCacheUsage float64
300313
modelName := s.getDisplayedModelName(s.config.Model)
301314
if s.config.FakeMetrics != nil {
@@ -352,6 +365,8 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
352365
s.metrics.waitingRequests.WithLabelValues(modelName).Set(nWaitingReqs)
353366
s.metrics.kvCacheUsagePercentage.WithLabelValues(modelName).Set(kvCacheUsage)
354367

368+
cacheConfig.WithLabelValues(strconv.Itoa(s.config.TokenBlockSize), strconv.Itoa(s.config.KVCacheSize)).Set(1)
369+
355370
if s.config.FakeMetrics != nil && len(s.config.FakeMetrics.LoraMetrics) != 0 {
356371
for _, metrics := range s.config.FakeMetrics.LoraMetrics {
357372
s.metrics.loraInfo.WithLabelValues(

pkg/llm-d-inference-sim/metrics_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,24 @@ var _ = Describe("Simulator metrics", Ordered, func() {
598598
}()
599599
wg.Wait()
600600
})
601+
602+
It("Should send correct kv cache config metrics", func() {
603+
ctx := context.TODO()
604+
args := []string{"cmd", "--model", qwenModelName, "--mode", common.ModeRandom,
605+
"--kv-cache-size", "16", "--block-size", "8"}
606+
607+
client, err := startServerWithArgs(ctx, args)
608+
Expect(err).NotTo(HaveOccurred())
609+
610+
metricsResp, err := client.Get(metricsUrl)
611+
Expect(err).NotTo(HaveOccurred())
612+
Expect(metricsResp.StatusCode).To(Equal(http.StatusOK))
613+
614+
data, err := io.ReadAll(metricsResp.Body)
615+
Expect(err).NotTo(HaveOccurred())
616+
metrics := string(data)
617+
Expect(metrics).To(ContainSubstring("vllm:cache_config_info{block_size=\"8\",num_gpu_blocks=\"16\"} 1"))
618+
})
601619
})
602620

603621
Context("fake metrics", func() {

pkg/vllm-api/vllm-models.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ const (
2626
PromLabelMaxLora = "max_lora"
2727
PromLabelModelName = "model_name"
2828
PromLabelFinishReason = "finish_reason"
29+
PromLabelCacheBlockSize = "block_size"
30+
PromLabelCacheNumGPUBlocks = "num_gpu_blocks"
2931
)
3032

3133
// modelInfo defines data about model returned by /models API

0 commit comments

Comments
 (0)