@@ -7,12 +7,34 @@ inferenceExtension:
7
7
pullPolicy : Always
8
8
extProcPort : 9002
9
9
env : []
10
- enablePprof : true # Enable pprof handlers for profiling and debugging
10
+ enablePprof : true # Enable pprof handlers for profiling and debugging
11
11
modelServerMetricsPath : " /metrics"
12
12
modelServerMetricsScheme : " http"
13
13
modelServerMetricsHttpsInsecureSkipVerify : true
14
- # This is the plugins configuration file.
14
+ grpcPort : 9002
15
+ grpcHealthPort : 9003
16
+ metricsPort : 9090
17
+ destinationEndpointHintMetadataNamespace : " envoy.lb"
18
+ destinationEndpointHintKey : " x-gateway-destination-endpoint"
19
+ fairnessIDHeaderKey : " x-gateway-inference-fairness-id"
20
+ poolName : " "
21
+ poolNamespace : " default"
22
+ refreshMetricsInterval : " 50ms"
23
+ refreshPrometheusMetricsInterval : " 5s"
24
+ secureServing : true
25
+ healthChecking : false
26
+ totalQueuedRequestsMetric : " vllm:num_requests_waiting"
27
+ kvCacheUsagePercentageMetric : " vllm:gpu_cache_usage_perc"
28
+ loraInfoMetric : " vllm:lora_requests_info"
29
+ certPath : " "
30
+ configFile : " "
31
+ configText : " "
32
+ metricsStalenessThreshold : " 2s"
33
+
15
34
pluginsConfigFile : " default-plugins.yaml"
35
+ logVerbosity : 1
36
+
37
+ # This is the plugins configuration file.
16
38
# pluginsCustomConfig:
17
39
# custom-plugins.yaml: |
18
40
# apiVersion: inference.networking.x-k8s.io/v1alpha1
@@ -34,18 +56,18 @@ inferenceExtension:
34
56
# Example environment variables:
35
57
# env:
36
58
# KV_CACHE_SCORE_WEIGHT: "1"
37
-
38
59
# Define additional container ports
60
+ modelServerMetricsPort : 0
39
61
extraContainerPorts : []
40
62
# Define additional service ports
41
63
extraServicePorts : []
42
64
43
65
inferencePool :
44
66
targetPortNumber : 8000
45
67
modelServerType : vllm # vllm, triton-tensorrt-llm
46
- # modelServers: # REQUIRED
47
- # matchLabels:
48
- # app: vllm-llama3-8b-instruct
68
+ modelServers :
69
+ matchLabels :
70
+ app : vllm-llama3-8b-instruct
49
71
50
72
provider :
51
73
name : none
0 commit comments