You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
"The gRPC port used for communicating with Envoy proxy")
80
-
grpcHealthPort=flag.Int(
81
-
"grpc-health-port",
82
-
runserver.DefaultGrpcHealthPort,
83
-
"The port used for gRPC liveness and readiness probes")
84
-
metricsPort=flag.Int(
85
-
"metrics-port",
86
-
runserver.DefaultMetricsPort,
87
-
"The metrics port")
88
-
enablePprof=flag.Bool(
89
-
"enable-pprof",
90
-
runserver.DefaultEnablePprof,
91
-
"Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
92
-
poolName=flag.String(
93
-
"pool-name",
94
-
runserver.DefaultPoolName,
95
-
"Name of the InferencePool this Endpoint Picker is associated with.")
96
-
poolGroup=flag.String(
97
-
"pool-group",
98
-
runserver.DefaultPoolGroup,
99
-
"group of the InferencePool this Endpoint Picker is associated with.")
100
-
poolNamespace=flag.String(
101
-
"pool-namespace",
102
-
runserver.DefaultPoolNamespace,
103
-
"Namespace of the InferencePool this Endpoint Picker is associated with.")
104
-
logVerbosity=flag.Int(
105
-
"v",
106
-
logging.DEFAULT,
107
-
"number for the log level verbosity")
108
-
secureServing=flag.Bool(
109
-
"secure-serving",
110
-
runserver.DefaultSecureServing,
111
-
"Enables secure serving. Defaults to true.")
112
-
healthChecking=flag.Bool(
113
-
"health-checking",
114
-
runserver.DefaultHealthChecking,
115
-
"Enables health checking")
116
-
certPath=flag.String(
117
-
"cert-path",
118
-
runserver.DefaultCertPath,
119
-
"The path to the certificate for secure serving. The certificate and private key files "+
120
-
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
121
-
"then a self-signed certificate is used.")
76
+
grpcPort=flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy")
77
+
grpcHealthPort=flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes")
78
+
metricsPort=flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port")
79
+
enablePprof=flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
80
+
poolName=flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.")
81
+
poolGroup=flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.")
82
+
poolNamespace=flag.String("pool-namespace", runserver.DefaultPoolNamespace, "Namespace of the InferencePool this Endpoint Picker is associated with.")
83
+
logVerbosity=flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
84
+
secureServing=flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
85
+
healthChecking=flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking")
86
+
certPath=flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+
87
+
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
88
+
"then a self-signed certificate is used.")
122
89
// metric flags
123
-
totalQueuedRequestsMetric=flag.String(
124
-
"total-queued-requests-metric",
125
-
runserver.DefaultTotalQueuedRequestsMetric,
126
-
"Prometheus metric for the number of queued requests.")
127
-
kvCacheUsagePercentageMetric=flag.String(
128
-
"kv-cache-usage-percentage-metric",
129
-
runserver.DefaultKvCacheUsagePercentageMetric,
130
-
"Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1).")
90
+
totalQueuedRequestsMetric=flag.String("total-queued-requests-metric", runserver.DefaultTotalQueuedRequestsMetric, "Prometheus metric for the number of queued requests.")
91
+
kvCacheUsagePercentageMetric=flag.String("kv-cache-usage-percentage-metric", runserver.DefaultKvCacheUsagePercentageMetric, "Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1).")
131
92
// LoRA metrics
132
-
loraInfoMetric=flag.String(
133
-
"lora-info-metric",
134
-
runserver.DefaultLoraInfoMetric,
135
-
"Prometheus metric for the LoRA info metrics (must be in vLLM label format).")
136
-
93
+
loraInfoMetric=flag.String("lora-info-metric", runserver.DefaultLoraInfoMetric, "Prometheus metric for the LoRA info metrics (must be in vLLM label format).")
"Duration after which metrics are considered stale. This is used to determine if a pod's metrics are fresh enough.")
95
+
refreshMetricsInterval=flag.Duration("refresh-metrics-interval", runserver.DefaultRefreshMetricsInterval, "interval to refresh metrics")
96
+
refreshPrometheusMetricsInterval=flag.Duration("refresh-prometheus-metrics-interval", runserver.DefaultRefreshPrometheusMetricsInterval, "interval to flush prometheus metrics")
97
+
metricsStalenessThreshold=flag.Duration("metrics-staleness-threshold", runserver.DefaultMetricsStalenessThreshold, "Duration after which metrics are considered stale. This is used to determine if a pod's metrics are fresh enough.")
149
98
// configuration flags
150
-
configFile=flag.String(
151
-
"config-file",
152
-
runserver.DefaultConfigFile,
153
-
"The path to the configuration file")
154
-
configText=flag.String(
155
-
"config-text",
156
-
runserver.DefaultConfigText,
157
-
"The configuration specified as text, in lieu of a file")
99
+
configFile=flag.String("config-file", runserver.DefaultConfigFile, "The path to the configuration file")
100
+
configText=flag.String("config-text", runserver.DefaultConfigText, "The configuration specified as text, in lieu of a file")
158
101
159
102
modelServerMetricsPort=flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+
160
103
"Default value will be set to the InferencePool.Spec.TargetPorts[0].Number if not set.")
161
104
modelServerMetricsPath=flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods")
162
105
modelServerMetricsScheme=flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods")
163
106
modelServerMetricsHttpsInsecureSkipVerify=flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)")
164
-
haEnableLeaderElection=flag.Bool(
165
-
"ha-enable-leader-election",
166
-
false,
167
-
"Enables leader election for high availability. When enabled, readiness probes will only pass on the leader.")
107
+
haEnableLeaderElection=flag.Bool("ha-enable-leader-election", false, "Enables leader election for high availability. When enabled, readiness probes will only pass on the leader.")
0 commit comments