@@ -64,10 +64,12 @@ var (
64
64
"The gRPC port used for communicating with Envoy proxy" )
65
65
grpcHealthPort = flag .Int (
66
66
"grpcHealthPort" ,
67
- 9003 ,
67
+ runserver . DefaultGrpcHealthPort ,
68
68
"The port used for gRPC liveness and readiness probes" )
69
69
metricsPort = flag .Int (
70
- "metricsPort" , 9090 , "The metrics port" )
70
+ "metricsPort" ,
71
+ runserver .DefaultMetricsPort ,
72
+ "The metrics port" )
71
73
destinationEndpointHintKey = flag .String (
72
74
"destinationEndpointHintKey" ,
73
75
runserver .DefaultDestinationEndpointHintKey ,
@@ -93,28 +95,47 @@ var (
93
95
"refreshPrometheusMetricsInterval" ,
94
96
runserver .DefaultRefreshPrometheusMetricsInterval ,
95
97
"interval to flush prometheus metrics" )
96
- logVerbosity = flag .Int ("v" , logging .DEFAULT , "number for the log level verbosity" )
98
+ logVerbosity = flag .Int (
99
+ "v" ,
100
+ logging .DEFAULT ,
101
+ "number for the log level verbosity" )
97
102
secureServing = flag .Bool (
98
- "secureServing" , runserver .DefaultSecureServing , "Enables secure serving. Defaults to true." )
99
- healthChecking = flag .Bool ("healthChecking" , runserver .DefaultHealthChecking , "Enables health checking" )
100
- certPath = flag .String (
101
- "certPath" , "" , "The path to the certificate for secure serving. The certificate and private key files " +
103
+ "secureServing" ,
104
+ runserver .DefaultSecureServing ,
105
+ "Enables secure serving. Defaults to true." )
106
+ healthChecking = flag .Bool (
107
+ "healthChecking" ,
108
+ runserver .DefaultHealthChecking ,
109
+ "Enables health checking" )
110
+ certPath = flag .String (
111
+ "certPath" ,
112
+ runserver .DefaultCertPath ,
113
+ "The path to the certificate for secure serving. The certificate and private key files " +
102
114
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, " +
103
115
"then a self-signed certificate is used." )
104
116
// metric flags
105
- totalQueuedRequestsMetric = flag .String ("totalQueuedRequestsMetric" ,
106
- "vllm:num_requests_waiting" ,
117
+ totalQueuedRequestsMetric = flag .String (
118
+ "totalQueuedRequestsMetric" ,
119
+ runserver .DefaultTotalQueuedRequestsMetric ,
107
120
"Prometheus metric for the number of queued requests." )
108
- kvCacheUsagePercentageMetric = flag .String ("kvCacheUsagePercentageMetric" ,
109
- "vllm:gpu_cache_usage_perc" ,
121
+ kvCacheUsagePercentageMetric = flag .String (
122
+ "kvCacheUsagePercentageMetric" ,
123
+ runserver .DefaultKvCacheUsagePercentageMetric ,
110
124
"Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1)." )
111
125
// LoRA metrics
112
- loraInfoMetric = flag .String ("loraInfoMetric" ,
113
- "vllm:lora_requests_info" ,
126
+ loraInfoMetric = flag .String (
127
+ "loraInfoMetric" ,
128
+ runserver .DefaultLoraInfoMetric ,
114
129
"Prometheus metric for the LoRA info metrics (must be in vLLM label format)." )
115
130
// configuration flags
116
- configFile = flag .String ("configFile" , "" , "The path to the configuration file" )
117
- configText = flag .String ("configText" , "" , "The configuration specified as text, in lieu of a file" )
131
+ configFile = flag .String (
132
+ "configFile" ,
133
+ runserver .DefaultConfigFile ,
134
+ "The path to the configuration file" )
135
+ configText = flag .String (
136
+ "configText" ,
137
+ runserver .DefaultConfigText ,
138
+ "The configuration specified as text, in lieu of a file" )
118
139
119
140
setupLog = ctrl .Log .WithName ("setup" )
120
141
@@ -405,7 +426,7 @@ func validateFlags() error {
405
426
return fmt .Errorf ("required %q flag not set" , "poolName" )
406
427
}
407
428
if * configText != "" && * configFile != "" {
408
- return fmt .Errorf ("both the %s and %s flags can not be set at the same time" , "configText" , "configFile" )
429
+ return fmt .Errorf ("both the %q and %q flags can not be set at the same time" , "configText" , "configFile" )
409
430
}
410
431
411
432
return nil
0 commit comments