@@ -104,80 +104,18 @@ metadata:
104
104
namespace : $E2E_NS
105
105
data :
106
106
default-plugins.yaml : |
107
- apiVersion: inference.networking.x-k8s.io/v1alpha1
108
- kind: EndpointPickerConfig
109
- plugins:
110
- - type: low-queue-filter
111
- parameters:
112
- threshold: 128
113
- - type: lora-affinity-filter
114
- parameters:
115
- threshold: 0.999
116
- - type: least-queue-filter
117
- - type: least-kv-cache-filter
118
- - type: decision-tree-filter
119
- name: low-latency-filter
120
- parameters:
121
- current:
122
- pluginRef: low-queue-filter
123
- nextOnSuccess:
124
- decisionTree:
125
- current:
126
- pluginRef: lora-affinity-filter
127
- nextOnSuccessOrFailure:
128
- decisionTree:
129
- current:
130
- pluginRef: least-queue-filter
131
- nextOnSuccessOrFailure:
132
- decisionTree:
133
- current:
134
- pluginRef: least-kv-cache-filter
135
- nextOnFailure:
136
- decisionTree:
137
- current:
138
- pluginRef: least-queue-filter
139
- nextOnSuccessOrFailure:
140
- decisionTree:
141
- current:
142
- pluginRef: lora-affinity-filter
143
- nextOnSuccessOrFailure:
144
- decisionTree:
145
- current:
146
- pluginRef: least-kv-cache-filter
147
- - type: random-picker
148
- parameters:
149
- maxNumOfEndpoints: 1
150
- - type: single-profile-handler
151
- schedulingProfiles:
152
- - name: default
153
- plugins:
154
- - pluginRef: low-latency-filter
155
- - pluginRef: random-picker
156
- plugins-v2.yaml : |
157
107
apiVersion: inference.networking.x-k8s.io/v1alpha1
158
108
kind: EndpointPickerConfig
159
109
plugins:
160
110
- type: queue-scorer
161
111
- type: kv-cache-utilization-scorer
162
112
- type: prefix-cache-scorer
163
- parameters:
164
- hashBlockSize: 64
165
- maxPrefixBlocksToMatch: 256
166
- lruCapacityPerServer: 31250
167
- - type: max-score-picker
168
- parameters:
169
- maxNumOfEndpoints: 1
170
- - type: single-profile-handler
171
113
schedulingProfiles:
172
114
- name: default
173
115
plugins:
174
116
- pluginRef: queue-scorer
175
- weight: 1
176
117
- pluginRef: kv-cache-utilization-scorer
177
- weight: 1
178
118
- pluginRef: prefix-cache-scorer
179
- weight: 1
180
- - pluginRef: max-score-picker
181
119
---
182
120
kind : Role
183
121
apiVersion : rbac.authorization.k8s.io/v1
0 commit comments