@@ -29,6 +29,7 @@ import (
2929 "github.com/prometheus/client_golang/prometheus"
3030 "github.com/valyala/fasthttp"
3131 "golang.org/x/sync/errgroup"
32+ "gopkg.in/yaml.v3"
3233 "k8s.io/klog/v2"
3334
3435 "github.com/llm-d/llm-d-inference-sim/pkg/common"
@@ -497,3 +498,89 @@ func (s *VllmSimulator) createModelsResponse() *vllmapi.ModelsResponse {
497498
498499 return & modelsResp
499500}
501+ << << << < HEAD
502+ == == == =
503+
504+ // HandleHealth http handler for /health
505+ func (s * VllmSimulator ) HandleHealth (ctx * fasthttp.RequestCtx ) {
506+ s .logger .V (4 ).Info ("health request received" )
507+ ctx .Response .Header .SetContentType ("application/json" )
508+ ctx .Response .Header .SetStatusCode (fasthttp .StatusOK )
509+ ctx .Response .SetBody ([]byte ("{}" ))
510+ }
511+
512+ // HandleReady http handler for /ready
513+ func (s * VllmSimulator ) HandleReady (ctx * fasthttp.RequestCtx ) {
514+ s .logger .V (4 ).Info ("readiness request received" )
515+ ctx .Response .Header .SetContentType ("application/json" )
516+ ctx .Response .Header .SetStatusCode (fasthttp .StatusOK )
517+ ctx .Response .SetBody ([]byte ("{}" ))
518+ }
519+
520+ // getDisplayedModelName returns the model name that must appear in API
521+ // responses. LoRA adapters keep their explicit name, while all base-model
522+ // requests are surfaced as the first alias from --served-model-name.
523+ func (s * VllmSimulator ) getDisplayedModelName (reqModel string ) string {
524+ if s .isLora (reqModel ) {
525+ return reqModel
526+ }
527+ return s .config .ServedModelNames [0 ]
528+ }
529+
530+ func (s * VllmSimulator ) showConfig (dp bool ) error {
531+ cfgYAML , err := yaml .Marshal (s .config )
532+ if err != nil {
533+ return fmt .Errorf ("failed to marshal configuration to YAML: %w" , err )
534+ }
535+
536+ var m map [string ]interface {}
537+ err = yaml .Unmarshal (cfgYAML , & m )
538+ if err != nil {
539+ return fmt .Errorf ("failed to unmarshal YAML to map: %w" , err )
540+ }
541+ if dp {
542+ // remove the port
543+ delete (m , "port" )
544+ }
545+ // clean LoraModulesString field
546+ m ["lora-modules" ] = m ["LoraModules" ]
547+ delete (m , "LoraModules" )
548+ delete (m , "LoraModulesString" )
549+
550+ // clean fake-metrics field
551+ if field , ok := m ["fake-metrics" ].(map [string ]interface {}); ok {
552+ delete (field , "LorasString" )
553+ }
554+
555+ // show in YAML
556+ cfgYAML , err = yaml .Marshal (m )
557+ if err != nil {
558+ return fmt .Errorf ("failed to marshal configuration to YAML: %w" , err )
559+ }
560+ s .logger .Info ("Configuration:" , "" , string (cfgYAML ))
561+ return nil
562+ }
563+
564+ func (s * VllmSimulator ) getCurrFactor () float64 {
565+ if s .config .MaxNumSeqs <= 1 {
566+ return 1.0
567+ }
568+ return 1 + (s .config .TimeFactorUnderLoad - 1 )* float64 (s .nRunningReqs - 1 )/ float64 (s .config .MaxNumSeqs - 1 )
569+ }
570+
571+ func (s * VllmSimulator ) GetTimeToFirstToken () int {
572+ return int (float64 (s .config .TimeToFirstToken ) * s .getCurrFactor ())
573+ }
574+
575+ func (s * VllmSimulator ) GetPrefillOverhead () int {
576+ return int (float64 (s .config .PrefillOverhead ) * s .getCurrFactor ())
577+ }
578+
579+ func (s * VllmSimulator ) GetPrefillTimePerToken () int {
580+ return int (float64 (s .config .PrefillTimePerToken ) * s .getCurrFactor ())
581+ }
582+
583+ func (s * VllmSimulator ) GetInterTokenLatency () int {
584+ return int (float64 (s .config .InterTokenLatency ) * s .getCurrFactor ())
585+ }
586+ >> >> >> > 482434 e (Show config in yaml )
0 commit comments