Skip to content

Commit 1cdd97e

Browse files
authored
Show final config in simulaor default logger at Info lvel (#154)
* Show final config in simulaor default logger at Info lvel Signed-off-by: Qifan Deng <[email protected]> * Remove unnecessary local var and update show config prompt Signed-off-by: Qifan Deng <[email protected]> * Resolve conflict due to new arg of zmq max retries Signed-off-by: Qifan Deng <[email protected]> * Clean fields when show final configuration Signed-off-by: Qifan Deng <[email protected]> * Simplify function syntax Signed-off-by: Qifan Deng <[email protected]> * Fix golangci-lint installation link in makefile Signed-off-by: Qifan Deng <[email protected]> * Fix err fmt when logger is invalid Signed-off-by: Qifan Deng <[email protected]> --------- Signed-off-by: Qifan Deng <[email protected]>
1 parent 859d8c2 commit 1cdd97e

File tree

3 files changed

+75
-34
lines changed

3 files changed

+75
-34
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ check-ginkgo:
170170
.PHONY: check-golangci-lint
171171
check-golangci-lint:
172172
@command -v golangci-lint >/dev/null 2>&1 || { \
173-
echo "❌ golangci-lint is not installed. Install from https://golangci-lint.run/usage/install/"; exit 1; }
173+
echo "❌ golangci-lint is not installed. Install from https://golangci-lint.run/docs/welcome/install/"; exit 1; }
174174

175175
.PHONY: check-container-tool
176176
check-container-tool:

pkg/common/config.go

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -39,100 +39,101 @@ const (
3939

4040
type Configuration struct {
4141
// Port defines on which port the simulator runs
42-
Port int `yaml:"port"`
42+
Port int `yaml:"port" json:"port"`
4343
// Model defines the current base model name
44-
Model string `yaml:"model"`
44+
Model string `yaml:"model" json:"model"`
4545
// ServedModelNames is one or many model names exposed by the API
46-
ServedModelNames []string `yaml:"served-model-name"`
46+
ServedModelNames []string `yaml:"served-model-name" json:"served-model-name"`
4747
// MaxLoras defines maximum number of loaded LoRAs
48-
MaxLoras int `yaml:"max-loras"`
48+
MaxLoras int `yaml:"max-loras" json:"max-loras"`
4949
// MaxCPULoras defines maximum number of LoRAs to store in CPU memory
50-
MaxCPULoras int `yaml:"max-cpu-loras"`
50+
MaxCPULoras int `yaml:"max-cpu-loras" json:"max-cpu-loras"`
5151
// MaxNumSeqs is maximum number of sequences per iteration (the maximum
5252
// number of inference requests that could be processed at the same time)
53-
MaxNumSeqs int `yaml:"max-num-seqs"`
53+
MaxNumSeqs int `yaml:"max-num-seqs" json:"max-num-seqs"`
5454
// MaxModelLen is the model's context window, the maximum number of tokens
5555
// in a single request including input and output. Default value is 1024.
56-
MaxModelLen int `yaml:"max-model-len"`
56+
MaxModelLen int `yaml:"max-model-len" json:"max-model-len"`
5757
// LoraModulesString is a list of LoRA adapters as strings
58-
LoraModulesString []string `yaml:"lora-modules"`
58+
LoraModulesString []string `yaml:"lora-modules" json:"lora-modules"`
5959
// LoraModules is a list of LoRA adapters
6060
LoraModules []LoraModule
6161

6262
// TimeToFirstToken time before the first token will be returned, in milliseconds
63-
TimeToFirstToken int `yaml:"time-to-first-token"`
63+
TimeToFirstToken int `yaml:"time-to-first-token" json:"time-to-first-token"`
6464
// TimeToFirstTokenStdDev standard deviation for time before the first token will be returned,
6565
// in milliseconds, optional, default is 0, can't be more than 30% of TimeToFirstToken, will not
6666
// cause the actual time to first token to differ by more than 70% from TimeToFirstToken
67-
TimeToFirstTokenStdDev int `yaml:"time-to-first-token-std-dev"`
67+
TimeToFirstTokenStdDev int `yaml:"time-to-first-token-std-dev" json:"time-to-first-token-std-dev"`
6868
// InterTokenLatency time between generated tokens, in milliseconds
69-
InterTokenLatency int `yaml:"inter-token-latency"`
69+
InterTokenLatency int `yaml:"inter-token-latency" json:"inter-token-latency"`
7070
// InterTokenLatencyStdDev standard deviation for time between generated tokens, in milliseconds,
7171
// optional, default is 0, can't be more than 30% of InterTokenLatency, will not cause the actual
7272
// inter token latency to differ by more than 70% from InterTokenLatency
73-
InterTokenLatencyStdDev int `yaml:"inter-token-latency-std-dev"`
73+
InterTokenLatencyStdDev int `yaml:"inter-token-latency-std-dev" json:"inter-token-latency-std-dev"`
7474
// KVCacheTransferLatency time to "transfer" kv-cache from another vLLM instance in case P/D is activated,
7575
// in milliseconds
76-
KVCacheTransferLatency int `yaml:"kv-cache-transfer-latency"`
76+
KVCacheTransferLatency int `yaml:"kv-cache-transfer-latency" json:"kv-cache-transfer-latency"`
7777
// KVCacheTransferLatencyStdDev standard deviation for time to "transfer" kv-cache from another
7878
// vLLM instance in case P/D is activated, in milliseconds, optional, default is 0, can't be more
7979
// than 30% of KVCacheTransferLatency, will not cause the actual latency to differ by more than 70% from
8080
// KVCacheTransferLatency
81-
KVCacheTransferLatencyStdDev int `yaml:"kv-cache-transfer-latency-std-dev"`
81+
KVCacheTransferLatencyStdDev int `yaml:"kv-cache-transfer-latency-std-dev" json:"kv-cache-transfer-latency-std-dev"`
8282

8383
// Mode defines the simulator response generation mode, valid values: echo, random
84-
Mode string `yaml:"mode"`
84+
Mode string `yaml:"mode" json:"mode"`
8585
// Seed defines random seed for operations
86-
Seed int64 `yaml:"seed"`
86+
Seed int64 `yaml:"seed" json:"seed"`
8787

8888
// MaxToolCallIntegerParam defines the maximum possible value of integer parameters in a tool call,
8989
// optional, defaults to 100
90-
MaxToolCallIntegerParam int `yaml:"max-tool-call-integer-param"`
90+
MaxToolCallIntegerParam int `yaml:"max-tool-call-integer-param" json:"max-tool-call-integer-param"`
9191
// MinToolCallIntegerParam defines the minimum possible value of integer parameters in a tool call,
9292
// optional, defaults to 0
93-
MinToolCallIntegerParam int `yaml:"min-tool-call-integer-param"`
93+
MinToolCallIntegerParam int `yaml:"min-tool-call-integer-param" json:"min-tool-call-integer-param"`
9494
// MaxToolCallNumberParam defines the maximum possible value of number (float) parameters in a tool call,
9595
// optional, defaults to 100
96-
MaxToolCallNumberParam float64 `yaml:"max-tool-call-number-param"`
96+
MaxToolCallNumberParam float64 `yaml:"max-tool-call-number-param" json:"max-tool-call-number-param"`
9797
// MinToolCallNumberParam defines the minimum possible value of number (float) parameters in a tool call,
9898
// optional, defaults to 0
99-
MinToolCallNumberParam float64 `yaml:"min-tool-call-number-param"`
99+
MinToolCallNumberParam float64 `yaml:"min-tool-call-number-param" json:"min-tool-call-number-param"`
100100

101101
// MaxToolCallArrayParamLength defines the maximum possible length of array parameters in a tool call,
102102
// optional, defaults to 5
103-
MaxToolCallArrayParamLength int `yaml:"max-tool-call-array-param-length"`
103+
MaxToolCallArrayParamLength int `yaml:"max-tool-call-array-param-length" json:"max-tool-call-array-param-length"`
104104
// MinToolCallArrayParamLength defines the minimum possible length of array parameters in a tool call,
105105
// optional, defaults to 1
106-
MinToolCallArrayParamLength int `yaml:"min-tool-call-array-param-length"`
106+
MinToolCallArrayParamLength int `yaml:"min-tool-call-array-param-length" json:"min-tool-call-array-param-length"`
107107

108108
// ToolCallNotRequiredParamProbability is the probability to add a parameter, that is not required,
109109
// in a tool call, optional, defaults to 50
110-
ToolCallNotRequiredParamProbability int `yaml:"tool-call-not-required-param-probability"`
110+
ToolCallNotRequiredParamProbability int `yaml:"tool-call-not-required-param-probability" json:"tool-call-not-required-param-probability"`
111111
// ObjectToolCallNotRequiredParamProbability is the probability to add a field, that is not required,
112112
// in an object in a tool call, optional, defaults to 50
113-
ObjectToolCallNotRequiredParamProbability int `yaml:"object-tool-call-not-required-field-probability"`
113+
ObjectToolCallNotRequiredParamProbability int `yaml:"object-tool-call-not-required-field-probability" json:"object-tool-call-not-required-field-probability"`
114114

115115
// EnableKVCache defines if kv cache feature will be enabled
116-
EnableKVCache bool `yaml:"enable-kvcache"`
116+
EnableKVCache bool `yaml:"enable-kvcache" json:"enable-kvcache"`
117117
// KVCacheSize is the maximum number of token blocks in kv cache, the default value is 1024
118-
KVCacheSize int `yaml:"kv-cache-size"`
118+
KVCacheSize int `yaml:"kv-cache-size" json:"kv-cache-size"`
119119

120120
// TokenizersCacheDir is the directory for caching tokenizers
121-
TokenizersCacheDir string `yaml:"tokenizers-cache-dir"`
121+
TokenizersCacheDir string `yaml:"tokenizers-cache-dir" json:"tokenizers-cache-dir"`
122122
// TokenBlockSize is token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128, defaults to 16
123-
TokenBlockSize int `yaml:"block-size"`
123+
TokenBlockSize int `yaml:"block-size" json:"block-size"`
124124
// HashSeed is the seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
125-
HashSeed string `yaml:"hash-seed"`
125+
HashSeed string `yaml:"hash-seed" json:"hash-seed"`
126126

127127
// ZMQEndpoint is the ZMQ address to publish events, the default value is tcp://localhost:5557
128-
ZMQEndpoint string `yaml:"zmq-endpoint"`
128+
ZMQEndpoint string `yaml:"zmq-endpoint" json:"zmq-endpoint"`
129129
// ZMQMaxConnectAttempts defines the maximum number (10) of retries when ZMQ connection fails
130-
ZMQMaxConnectAttempts uint `yaml:"zmq-max-connect-attempts"`
130+
ZMQMaxConnectAttempts uint `yaml:"zmq-max-connect-attempts" json:"zmq-max-connect-attempts"`
131+
131132
// EventBatchSize is the maximum number of kv-cache events to be sent together, defaults to 16
132-
EventBatchSize int `yaml:"event-batch-size"`
133+
EventBatchSize int `yaml:"event-batch-size" json:"event-batch-size"`
133134

134135
// FakeMetrics is a set of metrics to send to Prometheus instead of the real data
135-
FakeMetrics *Metrics `yaml:"fake-metrics"`
136+
FakeMetrics *Metrics `yaml:"fake-metrics" json:"fake-metrics"`
136137
}
137138

138139
type Metrics struct {

pkg/llm-d-inference-sim/simulator.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package llmdinferencesim
2020
import (
2121
"context"
2222
"encoding/json"
23+
"errors"
2324
"fmt"
2425
"net"
2526
"os"
@@ -114,8 +115,14 @@ func (s *VllmSimulator) Start(ctx context.Context) error {
114115
if err != nil {
115116
return err
116117
}
118+
117119
s.config = config
118120

121+
err = s.showConfig(s.logger)
122+
if err != nil {
123+
return err
124+
}
125+
119126
for _, lora := range config.LoraModules {
120127
s.loraAdaptors.Store(lora.Name, "")
121128
}
@@ -734,3 +741,36 @@ func (s *VllmSimulator) getDisplayedModelName(reqModel string) string {
734741
}
735742
return s.config.ServedModelNames[0]
736743
}
744+
745+
func (s *VllmSimulator) showConfig(tgtLgr logr.Logger) error {
746+
if tgtLgr == logr.Discard() {
747+
return errors.New("target logger is nil, cannot show configuration")
748+
}
749+
cfgJSON, err := json.Marshal(s.config)
750+
if err != nil {
751+
return fmt.Errorf("failed to marshal configuration to JSON: %w", err)
752+
}
753+
754+
// clean LoraModulesString field
755+
var m map[string]interface{}
756+
err = json.Unmarshal(cfgJSON, &m)
757+
if err != nil {
758+
return fmt.Errorf("failed to unmarshal JSON to map: %w", err)
759+
}
760+
m["lora-modules"] = m["LoraModules"]
761+
delete(m, "LoraModules")
762+
delete(m, "LoraModulesString")
763+
764+
// clean fake-metrics field
765+
if field, ok := m["fake-metrics"].(map[string]interface{}); ok {
766+
delete(field, "LorasString")
767+
}
768+
769+
// show in JSON
770+
cfgJSON, err = json.MarshalIndent(m, "", " ")
771+
if err != nil {
772+
return fmt.Errorf("failed to marshal configuration to JSON: %w", err)
773+
}
774+
tgtLgr.Info("Configuration:", "", string(cfgJSON))
775+
return nil
776+
}

0 commit comments

Comments
 (0)