@@ -49,6 +49,7 @@ const (
4949 reqRunningMetricName = "vllm:num_requests_running"
5050 reqWaitingMetricName = "vllm:num_requests_waiting"
5151 gpuCacheUsageMetricName = "vllm:gpu_cache_usage_perc"
52+ cacheConfigName = "vllm:cache_config_info"
5253)
5354
5455// createAndRegisterPrometheus creates and registers prometheus metrics used by vLLM simulator
@@ -85,7 +86,6 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
8586 return err
8687 }
8788
88- // not supported for now, reports constant value
8989 s .metrics .waitingRequests = prometheus .NewGaugeVec (
9090 prometheus.GaugeOpts {
9191 Subsystem : "" ,
@@ -288,14 +288,27 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
288288 return err
289289 }
290290
291- s .setInitialPrometheusMetrics ()
291+ cacheConfig := prometheus .NewGaugeVec (
292+ prometheus.GaugeOpts {
293+ Subsystem : "" ,
294+ Name : cacheConfigName ,
295+ Help : "Information of the LLMEngine CacheConfig." ,
296+ },
297+ []string {vllmapi .PromLabelCacheBlockSize , vllmapi .PromLabelCacheNumGPUBlocks },
298+ )
299+ if err := s .metrics .registry .Register (cacheConfig ); err != nil {
300+ s .logger .Error (err , "prometheus cache config register failed" )
301+ return err
302+ }
303+
304+ s .setInitialPrometheusMetrics (cacheConfig )
292305
293306 return nil
294307}
295308
296309// setInitialPrometheusMetrics sends the default values to prometheus or
297310// the fake metrics if set
298- func (s * VllmSimulator ) setInitialPrometheusMetrics () {
311+ func (s * VllmSimulator ) setInitialPrometheusMetrics (cacheConfig * prometheus. GaugeVec ) {
299312 var nRunningReqs , nWaitingReqs , kvCacheUsage float64
300313 modelName := s .getDisplayedModelName (s .config .Model )
301314 if s .config .FakeMetrics != nil {
@@ -352,6 +365,8 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
352365 s .metrics .waitingRequests .WithLabelValues (modelName ).Set (nWaitingReqs )
353366 s .metrics .kvCacheUsagePercentage .WithLabelValues (modelName ).Set (kvCacheUsage )
354367
368+ cacheConfig .WithLabelValues (strconv .Itoa (s .config .TokenBlockSize ), strconv .Itoa (s .config .KVCacheSize )).Set (1 )
369+
355370 if s .config .FakeMetrics != nil && len (s .config .FakeMetrics .LoraMetrics ) != 0 {
356371 for _ , metrics := range s .config .FakeMetrics .LoraMetrics {
357372 s .metrics .loraInfo .WithLabelValues (
0 commit comments