Skip to content

Commit 9b40212

Browse files
committed
Show config in yaml
Signed-off-by: Qifan Deng <[email protected]>
1 parent 9c541b9 commit 9b40212

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

pkg/llm-d-inference-sim/simulator.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/prometheus/client_golang/prometheus"
3030
"github.com/valyala/fasthttp"
3131
"golang.org/x/sync/errgroup"
32+
"gopkg.in/yaml.v3"
3233
"k8s.io/klog/v2"
3334

3435
"github.com/llm-d/llm-d-inference-sim/pkg/common"
@@ -497,3 +498,89 @@ func (s *VllmSimulator) createModelsResponse() *vllmapi.ModelsResponse {
497498

498499
return &modelsResp
499500
}
501+
<<<<<<< HEAD
502+
=======
503+
504+
// HandleHealth http handler for /health
505+
func (s *VllmSimulator) HandleHealth(ctx *fasthttp.RequestCtx) {
506+
s.logger.V(4).Info("health request received")
507+
ctx.Response.Header.SetContentType("application/json")
508+
ctx.Response.Header.SetStatusCode(fasthttp.StatusOK)
509+
ctx.Response.SetBody([]byte("{}"))
510+
}
511+
512+
// HandleReady http handler for /ready
513+
func (s *VllmSimulator) HandleReady(ctx *fasthttp.RequestCtx) {
514+
s.logger.V(4).Info("readiness request received")
515+
ctx.Response.Header.SetContentType("application/json")
516+
ctx.Response.Header.SetStatusCode(fasthttp.StatusOK)
517+
ctx.Response.SetBody([]byte("{}"))
518+
}
519+
520+
// getDisplayedModelName returns the model name that must appear in API
521+
// responses. LoRA adapters keep their explicit name, while all base-model
522+
// requests are surfaced as the first alias from --served-model-name.
523+
func (s *VllmSimulator) getDisplayedModelName(reqModel string) string {
524+
if s.isLora(reqModel) {
525+
return reqModel
526+
}
527+
return s.config.ServedModelNames[0]
528+
}
529+
530+
func (s *VllmSimulator) showConfig(dp bool) error {
531+
cfgYAML, err := yaml.Marshal(s.config)
532+
if err != nil {
533+
return fmt.Errorf("failed to marshal configuration to YAML: %w", err)
534+
}
535+
536+
var m map[string]interface{}
537+
err = yaml.Unmarshal(cfgYAML, &m)
538+
if err != nil {
539+
return fmt.Errorf("failed to unmarshal YAML to map: %w", err)
540+
}
541+
if dp {
542+
// remove the port
543+
delete(m, "port")
544+
}
545+
// clean LoraModulesString field
546+
m["lora-modules"] = m["LoraModules"]
547+
delete(m, "LoraModules")
548+
delete(m, "LoraModulesString")
549+
550+
// clean fake-metrics field
551+
if field, ok := m["fake-metrics"].(map[string]interface{}); ok {
552+
delete(field, "LorasString")
553+
}
554+
555+
// show in YAML
556+
cfgYAML, err = yaml.Marshal(m)
557+
if err != nil {
558+
return fmt.Errorf("failed to marshal configuration to YAML: %w", err)
559+
}
560+
s.logger.Info("Configuration:", "", string(cfgYAML))
561+
return nil
562+
}
563+
564+
func (s *VllmSimulator) getCurrFactor() float64 {
565+
if s.config.MaxNumSeqs <= 1 {
566+
return 1.0
567+
}
568+
return 1 + (s.config.TimeFactorUnderLoad-1)*float64(s.nRunningReqs-1)/float64(s.config.MaxNumSeqs-1)
569+
}
570+
571+
func (s *VllmSimulator) GetTimeToFirstToken() int {
572+
return int(float64(s.config.TimeToFirstToken) * s.getCurrFactor())
573+
}
574+
575+
func (s *VllmSimulator) GetPrefillOverhead() int {
576+
return int(float64(s.config.PrefillOverhead) * s.getCurrFactor())
577+
}
578+
579+
func (s *VllmSimulator) GetPrefillTimePerToken() int {
580+
return int(float64(s.config.PrefillTimePerToken) * s.getCurrFactor())
581+
}
582+
583+
func (s *VllmSimulator) GetInterTokenLatency() int {
584+
return int(float64(s.config.InterTokenLatency) * s.getCurrFactor())
585+
}
586+
>>>>>>> 482434e (Show config in yaml)

0 commit comments

Comments
 (0)