From 2665b38e4a9ea692b645dad1b2f3986e11d7cec2 Mon Sep 17 00:00:00 2001 From: Qifan Deng Date: Thu, 21 Aug 2025 14:33:36 +1000 Subject: [PATCH 1/7] Show final config in simulaor default logger at Info lvel Signed-off-by: Qifan Deng --- pkg/llm-d-inference-sim/simulator.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index da5f53e1..cfdc0b3b 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -114,8 +114,14 @@ func (s *VllmSimulator) Start(ctx context.Context) error { if err != nil { return err } + s.config = config + err = s.showConfig(s.logger) + if err != nil { + return err + } + for _, lora := range config.LoraModules { s.loraAdaptors.Store(lora.Name, "") } @@ -708,3 +714,17 @@ func (s *VllmSimulator) getDisplayedModelName(reqModel string) string { } return s.config.ServedModelNames[0] } + +func (s *VllmSimulator) showConfig(tgtLgr logr.Logger) error { + if tgtLgr == logr.Discard() { + err := fmt.Errorf("target logger is nil, cannot show configuration") + return err + } + config := s.config + cfgJSON, err := json.MarshalIndent(config, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal configuration to JSON: %w", err) + } + tgtLgr.Info("Final simulator configuration:", "config", string(cfgJSON)) + return nil +} From 9909b66598133489fe5c47d0fe896202525d2564 Mon Sep 17 00:00:00 2001 From: Qifan Deng Date: Thu, 21 Aug 2025 20:26:54 +1000 Subject: [PATCH 2/7] Remove unnecessary local var and update show config prompt Signed-off-by: Qifan Deng --- pkg/llm-d-inference-sim/simulator.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index cfdc0b3b..ed915286 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -720,11 +720,10 @@ func (s *VllmSimulator) showConfig(tgtLgr logr.Logger) error { err := fmt.Errorf("target logger is nil, cannot show configuration") return err } - config := s.config - cfgJSON, err := json.MarshalIndent(config, "", " ") + cfgJSON, err := json.MarshalIndent(s.config, "", " ") if err != nil { return fmt.Errorf("failed to marshal configuration to JSON: %w", err) } - tgtLgr.Info("Final simulator configuration:", "config", string(cfgJSON)) + tgtLgr.Info("Configuration:", "", string(cfgJSON)) return nil } From 4b353e26aae528ed566fa39ae776f2521ce58112 Mon Sep 17 00:00:00 2001 From: Qifan Deng Date: Fri, 22 Aug 2025 00:40:55 +1000 Subject: [PATCH 3/7] Resolve conflict due to new arg of zmq max retries Signed-off-by: Qifan Deng --- pkg/common/config.go | 67 ++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/pkg/common/config.go b/pkg/common/config.go index 5f7357c3..3d5f6ac1 100644 --- a/pkg/common/config.go +++ b/pkg/common/config.go @@ -39,100 +39,101 @@ const ( type Configuration struct { // Port defines on which port the simulator runs - Port int `yaml:"port"` + Port int `yaml:"port" json:"port"` // Model defines the current base model name - Model string `yaml:"model"` + Model string `yaml:"model" json:"model"` // ServedModelNames is one or many model names exposed by the API - ServedModelNames []string `yaml:"served-model-name"` + ServedModelNames []string `yaml:"served-model-name" json:"served-model-name"` // MaxLoras defines maximum number of loaded LoRAs - MaxLoras int `yaml:"max-loras"` + MaxLoras int `yaml:"max-loras" json:"max-loras"` // MaxCPULoras defines maximum number of LoRAs to store in CPU memory - MaxCPULoras int `yaml:"max-cpu-loras"` + MaxCPULoras int `yaml:"max-cpu-loras" json:"max-cpu-loras"` // MaxNumSeqs is maximum number of sequences per iteration (the maximum // number of inference requests that could be processed at the same time) - MaxNumSeqs int `yaml:"max-num-seqs"` + MaxNumSeqs int `yaml:"max-num-seqs" json:"max-num-seqs"` // MaxModelLen is the model's context window, the maximum number of tokens // in a single request including input and output. Default value is 1024. - MaxModelLen int `yaml:"max-model-len"` + MaxModelLen int `yaml:"max-model-len" json:"max-model-len"` // LoraModulesString is a list of LoRA adapters as strings - LoraModulesString []string `yaml:"lora-modules"` + LoraModulesString []string `yaml:"lora-modules" json:"lora-modules"` // LoraModules is a list of LoRA adapters LoraModules []LoraModule // TimeToFirstToken time before the first token will be returned, in milliseconds - TimeToFirstToken int `yaml:"time-to-first-token"` + TimeToFirstToken int `yaml:"time-to-first-token" json:"time-to-first-token"` // TimeToFirstTokenStdDev standard deviation for time before the first token will be returned, // in milliseconds, optional, default is 0, can't be more than 30% of TimeToFirstToken, will not // cause the actual time to first token to differ by more than 70% from TimeToFirstToken - TimeToFirstTokenStdDev int `yaml:"time-to-first-token-std-dev"` + TimeToFirstTokenStdDev int `yaml:"time-to-first-token-std-dev" json:"time-to-first-token-std-dev"` // InterTokenLatency time between generated tokens, in milliseconds - InterTokenLatency int `yaml:"inter-token-latency"` + InterTokenLatency int `yaml:"inter-token-latency" json:"inter-token-latency"` // InterTokenLatencyStdDev standard deviation for time between generated tokens, in milliseconds, // optional, default is 0, can't be more than 30% of InterTokenLatency, will not cause the actual // inter token latency to differ by more than 70% from InterTokenLatency - InterTokenLatencyStdDev int `yaml:"inter-token-latency-std-dev"` + InterTokenLatencyStdDev int `yaml:"inter-token-latency-std-dev" json:"inter-token-latency-std-dev"` // KVCacheTransferLatency time to "transfer" kv-cache from another vLLM instance in case P/D is activated, // in milliseconds - KVCacheTransferLatency int `yaml:"kv-cache-transfer-latency"` + KVCacheTransferLatency int `yaml:"kv-cache-transfer-latency" json:"kv-cache-transfer-latency"` // KVCacheTransferLatencyStdDev standard deviation for time to "transfer" kv-cache from another // vLLM instance in case P/D is activated, in milliseconds, optional, default is 0, can't be more // than 30% of KVCacheTransferLatency, will not cause the actual latency to differ by more than 70% from // KVCacheTransferLatency - KVCacheTransferLatencyStdDev int `yaml:"kv-cache-transfer-latency-std-dev"` + KVCacheTransferLatencyStdDev int `yaml:"kv-cache-transfer-latency-std-dev" json:"kv-cache-transfer-latency-std-dev"` // Mode defines the simulator response generation mode, valid values: echo, random - Mode string `yaml:"mode"` + Mode string `yaml:"mode" json:"mode"` // Seed defines random seed for operations - Seed int64 `yaml:"seed"` + Seed int64 `yaml:"seed" json:"seed"` // MaxToolCallIntegerParam defines the maximum possible value of integer parameters in a tool call, // optional, defaults to 100 - MaxToolCallIntegerParam int `yaml:"max-tool-call-integer-param"` + MaxToolCallIntegerParam int `yaml:"max-tool-call-integer-param" json:"max-tool-call-integer-param"` // MinToolCallIntegerParam defines the minimum possible value of integer parameters in a tool call, // optional, defaults to 0 - MinToolCallIntegerParam int `yaml:"min-tool-call-integer-param"` + MinToolCallIntegerParam int `yaml:"min-tool-call-integer-param" json:"min-tool-call-integer-param"` // MaxToolCallNumberParam defines the maximum possible value of number (float) parameters in a tool call, // optional, defaults to 100 - MaxToolCallNumberParam float64 `yaml:"max-tool-call-number-param"` + MaxToolCallNumberParam float64 `yaml:"max-tool-call-number-param" json:"max-tool-call-number-param"` // MinToolCallNumberParam defines the minimum possible value of number (float) parameters in a tool call, // optional, defaults to 0 - MinToolCallNumberParam float64 `yaml:"min-tool-call-number-param"` + MinToolCallNumberParam float64 `yaml:"min-tool-call-number-param" json:"min-tool-call-number-param"` // MaxToolCallArrayParamLength defines the maximum possible length of array parameters in a tool call, // optional, defaults to 5 - MaxToolCallArrayParamLength int `yaml:"max-tool-call-array-param-length"` + MaxToolCallArrayParamLength int `yaml:"max-tool-call-array-param-length" json:"max-tool-call-array-param-length"` // MinToolCallArrayParamLength defines the minimum possible length of array parameters in a tool call, // optional, defaults to 1 - MinToolCallArrayParamLength int `yaml:"min-tool-call-array-param-length"` + MinToolCallArrayParamLength int `yaml:"min-tool-call-array-param-length" json:"min-tool-call-array-param-length"` // ToolCallNotRequiredParamProbability is the probability to add a parameter, that is not required, // in a tool call, optional, defaults to 50 - ToolCallNotRequiredParamProbability int `yaml:"tool-call-not-required-param-probability"` + ToolCallNotRequiredParamProbability int `yaml:"tool-call-not-required-param-probability" json:"tool-call-not-required-param-probability"` // ObjectToolCallNotRequiredParamProbability is the probability to add a field, that is not required, // in an object in a tool call, optional, defaults to 50 - ObjectToolCallNotRequiredParamProbability int `yaml:"object-tool-call-not-required-field-probability"` + ObjectToolCallNotRequiredParamProbability int `yaml:"object-tool-call-not-required-field-probability" json:"object-tool-call-not-required-field-probability"` // EnableKVCache defines if kv cache feature will be enabled - EnableKVCache bool `yaml:"enable-kvcache"` + EnableKVCache bool `yaml:"enable-kvcache" json:"enable-kvcache"` // KVCacheSize is the maximum number of token blocks in kv cache, the default value is 1024 - KVCacheSize int `yaml:"kv-cache-size"` + KVCacheSize int `yaml:"kv-cache-size" json:"kv-cache-size"` // TokenizersCacheDir is the directory for caching tokenizers - TokenizersCacheDir string `yaml:"tokenizers-cache-dir"` + TokenizersCacheDir string `yaml:"tokenizers-cache-dir" json:"tokenizers-cache-dir"` // TokenBlockSize is token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128, defaults to 16 - TokenBlockSize int `yaml:"block-size"` + TokenBlockSize int `yaml:"block-size" json:"block-size"` // HashSeed is the seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable) - HashSeed string `yaml:"hash-seed"` + HashSeed string `yaml:"hash-seed" json:"hash-seed"` // ZMQEndpoint is the ZMQ address to publish events, the default value is tcp://localhost:5557 - ZMQEndpoint string `yaml:"zmq-endpoint"` + ZMQEndpoint string `yaml:"zmq-endpoint" json:"zmq-endpoint"` // ZMQMaxConnectAttempts defines the maximum number (10) of retries when ZMQ connection fails - ZMQMaxConnectAttempts uint `yaml:"zmq-max-connect-attempts"` + ZMQMaxConnectAttempts uint `yaml:"zmq-max-connect-attempts" json:"zmq-max-connect-attempts"` + // EventBatchSize is the maximum number of kv-cache events to be sent together, defaults to 16 - EventBatchSize int `yaml:"event-batch-size"` + EventBatchSize int `yaml:"event-batch-size" json:"event-batch-size"` // FakeMetrics is a set of metrics to send to Prometheus instead of the real data - FakeMetrics *Metrics `yaml:"fake-metrics"` + FakeMetrics *Metrics `yaml:"fake-metrics" json:"fake-metrics"` } type Metrics struct { From 92863cc351c1c4f955de4d3ffb7a2b81b121dfde Mon Sep 17 00:00:00 2001 From: Qifan Deng Date: Fri, 22 Aug 2025 00:34:53 +1000 Subject: [PATCH 4/7] Clean fields when show final configuration Signed-off-by: Qifan Deng --- pkg/llm-d-inference-sim/simulator.go | 39 +++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index ed915286..f3b974c1 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -720,7 +720,44 @@ func (s *VllmSimulator) showConfig(tgtLgr logr.Logger) error { err := fmt.Errorf("target logger is nil, cannot show configuration") return err } - cfgJSON, err := json.MarshalIndent(s.config, "", " ") + cfgJSON, err := json.Marshal(s.config) + if err != nil { + return fmt.Errorf("failed to marshal configuration to JSON: %w", err) + } + + // clean LoraModulesString field + var m map[string]interface{} + err = json.Unmarshal(cfgJSON, &m) + if err != nil { + return fmt.Errorf("failed to unmarshal JSON to map: %w", err) + } + m["lora-modules"] = m["LoraModules"] + delete(m, "LoraModules") + if m["lora-modules"] == nil { + m["lora-modules"] = "" + } + delete(m, "LoraModulesString") + + // clean fake-metrics field + if m["fake-metrics"] != nil { + var fakeMetricsM map[string]interface{} + fakeMetricsJSON, err := json.Marshal(m["fake-metrics"]) + if err != nil { + return fmt.Errorf("failed to marshal fake-metrics to JSON: %w", err) + } + err = json.Unmarshal(fakeMetricsJSON, &fakeMetricsM) + if err != nil { + return fmt.Errorf("failed to unmarshal fake-metrics to map: %w", err) + } + delete(fakeMetricsM, "LorasString") + // set fake-metrics + m["fake-metrics"] = fakeMetricsM + } else { + m["fake-metrics"] = "" + } + + // show in JSON + cfgJSON, err = json.MarshalIndent(m, "", " ") if err != nil { return fmt.Errorf("failed to marshal configuration to JSON: %w", err) } From e6d578de5636e5e6e7caa20d4b2865684e28bf0c Mon Sep 17 00:00:00 2001 From: Qifan Deng Date: Sun, 24 Aug 2025 16:54:36 +1000 Subject: [PATCH 5/7] Simplify function syntax Signed-off-by: Qifan Deng --- pkg/llm-d-inference-sim/simulator.go | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index f3b974c1..fc163407 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -733,27 +733,11 @@ func (s *VllmSimulator) showConfig(tgtLgr logr.Logger) error { } m["lora-modules"] = m["LoraModules"] delete(m, "LoraModules") - if m["lora-modules"] == nil { - m["lora-modules"] = "" - } delete(m, "LoraModulesString") // clean fake-metrics field - if m["fake-metrics"] != nil { - var fakeMetricsM map[string]interface{} - fakeMetricsJSON, err := json.Marshal(m["fake-metrics"]) - if err != nil { - return fmt.Errorf("failed to marshal fake-metrics to JSON: %w", err) - } - err = json.Unmarshal(fakeMetricsJSON, &fakeMetricsM) - if err != nil { - return fmt.Errorf("failed to unmarshal fake-metrics to map: %w", err) - } - delete(fakeMetricsM, "LorasString") - // set fake-metrics - m["fake-metrics"] = fakeMetricsM - } else { - m["fake-metrics"] = "" + if field, ok := m["fake-metrics"].(map[string]interface{}); ok { + delete(field, "LorasString") } // show in JSON From 02c76c68d28219101bc4386cd4b069fa2df92c88 Mon Sep 17 00:00:00 2001 From: Qifan Deng Date: Sun, 24 Aug 2025 17:20:34 +1000 Subject: [PATCH 6/7] Fix golangci-lint installation link in makefile Signed-off-by: Qifan Deng --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7f4ea750..04691cf7 100644 --- a/Makefile +++ b/Makefile @@ -170,7 +170,7 @@ check-ginkgo: .PHONY: check-golangci-lint check-golangci-lint: @command -v golangci-lint >/dev/null 2>&1 || { \ - echo "❌ golangci-lint is not installed. Install from https://golangci-lint.run/usage/install/"; exit 1; } + echo "❌ golangci-lint is not installed. Install from https://golangci-lint.run/docs/welcome/install/"; exit 1; } .PHONY: check-container-tool check-container-tool: From c7b5458c50e398bfa669822c4b1c51b05faa7711 Mon Sep 17 00:00:00 2001 From: Qifan Deng Date: Sun, 24 Aug 2025 17:25:27 +1000 Subject: [PATCH 7/7] Fix err fmt when logger is invalid Signed-off-by: Qifan Deng --- pkg/llm-d-inference-sim/simulator.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index fc163407..28df1ce5 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -20,6 +20,7 @@ package llmdinferencesim import ( "context" "encoding/json" + "errors" "fmt" "net" "os" @@ -717,8 +718,7 @@ func (s *VllmSimulator) getDisplayedModelName(reqModel string) string { func (s *VllmSimulator) showConfig(tgtLgr logr.Logger) error { if tgtLgr == logr.Discard() { - err := fmt.Errorf("target logger is nil, cannot show configuration") - return err + return errors.New("target logger is nil, cannot show configuration") } cfgJSON, err := json.Marshal(s.config) if err != nil {