@@ -39,98 +39,98 @@ const (
3939
4040type Configuration struct {
4141 // Port defines on which port the simulator runs
42- Port int `yaml:"port"`
42+ Port int `yaml:"port" json:"port" `
4343 // Model defines the current base model name
44- Model string `yaml:"model"`
44+ Model string `yaml:"model" json:"model" `
4545 // ServedModelNames is one or many model names exposed by the API
46- ServedModelNames []string `yaml:"served-model-name"`
46+ ServedModelNames []string `yaml:"served-model-name" json:"served-model-name" `
4747 // MaxLoras defines maximum number of loaded LoRAs
48- MaxLoras int `yaml:"max-loras"`
48+ MaxLoras int `yaml:"max-loras" json:"max-loras" `
4949 // MaxCPULoras defines maximum number of LoRAs to store in CPU memory
50- MaxCPULoras int `yaml:"max-cpu-loras"`
50+ MaxCPULoras int `yaml:"max-cpu-loras" json:"max-cpu-loras" `
5151 // MaxNumSeqs is maximum number of sequences per iteration (the maximum
5252 // number of inference requests that could be processed at the same time)
53- MaxNumSeqs int `yaml:"max-num-seqs"`
53+ MaxNumSeqs int `yaml:"max-num-seqs" json:"max-num-seqs" `
5454 // MaxModelLen is the model's context window, the maximum number of tokens
5555 // in a single request including input and output. Default value is 1024.
56- MaxModelLen int `yaml:"max-model-len"`
56+ MaxModelLen int `yaml:"max-model-len" json:"max-model-len" `
5757 // LoraModulesString is a list of LoRA adapters as strings
58- LoraModulesString []string `yaml:"lora-modules"`
58+ LoraModulesString []string `yaml:"lora-modules" json:"lora-modules" `
5959 // LoraModules is a list of LoRA adapters
6060 LoraModules []LoraModule
6161
6262 // TimeToFirstToken time before the first token will be returned, in milliseconds
63- TimeToFirstToken int `yaml:"time-to-first-token"`
63+ TimeToFirstToken int `yaml:"time-to-first-token" json:"time-to-first-token" `
6464 // TimeToFirstTokenStdDev standard deviation for time before the first token will be returned,
6565 // in milliseconds, optional, default is 0, can't be more than 30% of TimeToFirstToken, will not
6666 // cause the actual time to first token to differ by more than 70% from TimeToFirstToken
67- TimeToFirstTokenStdDev int `yaml:"time-to-first-token-std-dev"`
67+ TimeToFirstTokenStdDev int `yaml:"time-to-first-token-std-dev" json:"time-to-first-token-std-dev" `
6868 // InterTokenLatency time between generated tokens, in milliseconds
69- InterTokenLatency int `yaml:"inter-token-latency"`
69+ InterTokenLatency int `yaml:"inter-token-latency" json:"inter-token-latency" `
7070 // InterTokenLatencyStdDev standard deviation for time between generated tokens, in milliseconds,
7171 // optional, default is 0, can't be more than 30% of InterTokenLatency, will not cause the actual
7272 // inter token latency to differ by more than 70% from InterTokenLatency
73- InterTokenLatencyStdDev int `yaml:"inter-token-latency-std-dev"`
73+ InterTokenLatencyStdDev int `yaml:"inter-token-latency-std-dev" json:"inter-token-latency-std-dev" `
7474 // KVCacheTransferLatency time to "transfer" kv-cache from another vLLM instance in case P/D is activated,
7575 // in milliseconds
76- KVCacheTransferLatency int `yaml:"kv-cache-transfer-latency"`
76+ KVCacheTransferLatency int `yaml:"kv-cache-transfer-latency" json:"kv-cache-transfer-latency" `
7777 // KVCacheTransferLatencyStdDev standard deviation for time to "transfer" kv-cache from another
7878 // vLLM instance in case P/D is activated, in milliseconds, optional, default is 0, can't be more
7979 // than 30% of KVCacheTransferLatency, will not cause the actual latency to differ by more than 70% from
8080 // KVCacheTransferLatency
81- KVCacheTransferLatencyStdDev int `yaml:"kv-cache-transfer-latency-std-dev"`
81+ KVCacheTransferLatencyStdDev int `yaml:"kv-cache-transfer-latency-std-dev" json:"kv-cache-transfer-latency-std-dev" `
8282
8383 // Mode defines the simulator response generation mode, valid values: echo, random
84- Mode string `yaml:"mode"`
84+ Mode string `yaml:"mode" json:"mode" `
8585 // Seed defines random seed for operations
86- Seed int64 `yaml:"seed"`
86+ Seed int64 `yaml:"seed" json:"seed" `
8787
8888 // MaxToolCallIntegerParam defines the maximum possible value of integer parameters in a tool call,
8989 // optional, defaults to 100
90- MaxToolCallIntegerParam int `yaml:"max-tool-call-integer-param"`
90+ MaxToolCallIntegerParam int `yaml:"max-tool-call-integer-param" json:"max-tool-call-integer-param" `
9191 // MinToolCallIntegerParam defines the minimum possible value of integer parameters in a tool call,
9292 // optional, defaults to 0
93- MinToolCallIntegerParam int `yaml:"min-tool-call-integer-param"`
93+ MinToolCallIntegerParam int `yaml:"min-tool-call-integer-param" json:"min-tool-call-integer-param" `
9494 // MaxToolCallNumberParam defines the maximum possible value of number (float) parameters in a tool call,
9595 // optional, defaults to 100
96- MaxToolCallNumberParam float64 `yaml:"max-tool-call-number-param"`
96+ MaxToolCallNumberParam float64 `yaml:"max-tool-call-number-param" json:"max-tool-call-number-param" `
9797 // MinToolCallNumberParam defines the minimum possible value of number (float) parameters in a tool call,
9898 // optional, defaults to 0
99- MinToolCallNumberParam float64 `yaml:"min-tool-call-number-param"`
99+ MinToolCallNumberParam float64 `yaml:"min-tool-call-number-param" json:"min-tool-call-number-param" `
100100
101101 // MaxToolCallArrayParamLength defines the maximum possible length of array parameters in a tool call,
102102 // optional, defaults to 5
103- MaxToolCallArrayParamLength int `yaml:"max-tool-call-array-param-length"`
103+ MaxToolCallArrayParamLength int `yaml:"max-tool-call-array-param-length" json:"max-tool-call-array-param-length" `
104104 // MinToolCallArrayParamLength defines the minimum possible length of array parameters in a tool call,
105105 // optional, defaults to 1
106- MinToolCallArrayParamLength int `yaml:"min-tool-call-array-param-length"`
106+ MinToolCallArrayParamLength int `yaml:"min-tool-call-array-param-length" json:"min-tool-call-array-param-length" `
107107
108108 // ToolCallNotRequiredParamProbability is the probability to add a parameter, that is not required,
109109 // in a tool call, optional, defaults to 50
110- ToolCallNotRequiredParamProbability int `yaml:"tool-call-not-required-param-probability"`
110+ ToolCallNotRequiredParamProbability int `yaml:"tool-call-not-required-param-probability" json:"tool-call-not-required-param-probability" `
111111 // ObjectToolCallNotRequiredParamProbability is the probability to add a field, that is not required,
112112 // in an object in a tool call, optional, defaults to 50
113- ObjectToolCallNotRequiredParamProbability int `yaml:"object-tool-call-not-required-field-probability"`
113+ ObjectToolCallNotRequiredParamProbability int `yaml:"object-tool-call-not-required-field-probability" json:"object-tool-call-not-required-field-probability" `
114114
115115 // EnableKVCache defines if kv cache feature will be enabled
116- EnableKVCache bool `yaml:"enable-kvcache"`
116+ EnableKVCache bool `yaml:"enable-kvcache" json:"enable-kvcache" `
117117 // KVCacheSize is the maximum number of token blocks in kv cache, the default value is 1024
118- KVCacheSize int `yaml:"kv-cache-size"`
118+ KVCacheSize int `yaml:"kv-cache-size" json:"kv-cache-size" `
119119
120120 // TokenizersCacheDir is the directory for caching tokenizers
121- TokenizersCacheDir string `yaml:"tokenizers-cache-dir"`
121+ TokenizersCacheDir string `yaml:"tokenizers-cache-dir" json:"tokenizers-cache-dir" `
122122 // TokenBlockSize is token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128, defaults to 16
123- TokenBlockSize int `yaml:"block-size"`
123+ TokenBlockSize int `yaml:"block-size" json:"block-size" `
124124 // HashSeed is the seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
125- HashSeed string `yaml:"hash-seed"`
125+ HashSeed string `yaml:"hash-seed" json:"hash-seed" `
126126
127127 // ZMQEndpoint is the ZMQ address to publish events, the default value is tcp://localhost:5557
128- ZMQEndpoint string `yaml:"zmq-endpoint"`
128+ ZMQEndpoint string `yaml:"zmq-endpoint" json:"zmq-endpoint" `
129129 // EventBatchSize is the maximum number of kv-cache events to be sent together, defaults to 16
130- EventBatchSize int `yaml:"event-batch-size"`
130+ EventBatchSize int `yaml:"event-batch-size" json:"event-batch-size" `
131131
132132 // FakeMetrics is a set of metrics to send to Prometheus instead of the real data
133- FakeMetrics * Metrics `yaml:"fake-metrics"`
133+ FakeMetrics * Metrics `yaml:"fake-metrics" json:"fake-metrics" `
134134}
135135
136136type Metrics struct {
0 commit comments