File tree Expand file tree Collapse file tree 7 files changed +59
-18
lines changed
components/inference-gateway
environments/dev/kubernetes-kgateway Expand file tree Collapse file tree 7 files changed +59
-18
lines changed Original file line number Diff line number Diff line change 19
19
containers :
20
20
- name : epp
21
21
image : ghcr.io/llm-d/llm-d-inference-scheduler:latest
22
- imagePullPolicy : Always
22
+ imagePullPolicy : IfNotPresent
23
23
args :
24
24
- -poolName
25
25
- " ${POOL_NAME}"
Original file line number Diff line number Diff line change @@ -4,10 +4,6 @@ apiVersion: inference.networking.x-k8s.io/v1alpha1
4
4
kind : EndpointPickerConfig
5
5
plugins :
6
6
- type : prefix-cache-scorer
7
- parameters :
8
- hashBlockSize : 5
9
- maxPrefixBlocksToMatch : 256
10
- lruCapacityPerServer : 31250
11
7
- type : decode-filter
12
8
- type : max-score-picker
13
9
- type : single-profile-handler
@@ -17,4 +13,4 @@ schedulingProfiles:
17
13
- pluginRef : decode-filter
18
14
- pluginRef : max-score-picker
19
15
- pluginRef : prefix-cache-scorer
20
- weight : 50
16
+ weight : 2
Original file line number Diff line number Diff line change @@ -4,27 +4,20 @@ kind: EndpointPickerConfig
4
4
plugins :
5
5
- type : prefill-header-handler
6
6
- type : prefix-cache-scorer
7
- parameters :
8
- hashBlockSize : 5
9
- maxPrefixBlocksToMatch : 256
10
- lruCapacityPerServer : 31250
11
7
- type : prefill-filter
12
8
- type : decode-filter
13
9
- type : max-score-picker
14
10
- type : pd-profile-handler
15
- parameters :
16
- threshold : 10
17
- hashBlockSize : 5
18
11
schedulingProfiles :
19
12
- name : prefill
20
13
plugins :
21
14
- pluginRef : prefill-filter
22
15
- pluginRef : max-score-picker
23
16
- pluginRef : prefix-cache-scorer
24
- weight : 50
17
+ weight : 2
25
18
- name : decode
26
19
plugins :
27
20
- pluginRef : decode-filter
28
21
- pluginRef : max-score-picker
29
22
- pluginRef : prefix-cache-scorer
30
- weight : 50
23
+ weight : 2
Original file line number Diff line number Diff line change
1
+ # Sample EPP configuration for running without P/D
2
+ # with small hash block size for simulation purposes
3
+ apiVersion : inference.networking.x-k8s.io/v1alpha1
4
+ kind : EndpointPickerConfig
5
+ plugins :
6
+ - type : prefix-cache-scorer
7
+ parameters :
8
+ hashBlockSize : 5
9
+ maxPrefixBlocksToMatch : 256
10
+ lruCapacityPerServer : 31250
11
+ - type : decode-filter
12
+ - type : max-score-picker
13
+ - type : single-profile-handler
14
+ schedulingProfiles :
15
+ - name : default
16
+ plugins :
17
+ - pluginRef : decode-filter
18
+ - pluginRef : max-score-picker
19
+ - pluginRef : prefix-cache-scorer
20
+ weight : 2
Original file line number Diff line number Diff line change
1
+ # Sample EPP configuration for tunning with P/D
2
+ # Use with small hash block size for simulation purposes
3
+ apiVersion : inference.networking.x-k8s.io/v1alpha1
4
+ kind : EndpointPickerConfig
5
+ plugins :
6
+ - type : prefill-header-handler
7
+ - type : prefix-cache-scorer
8
+ parameters :
9
+ hashBlockSize : 5
10
+ maxPrefixBlocksToMatch : 256
11
+ lruCapacityPerServer : 31250
12
+ - type : prefill-filter
13
+ - type : decode-filter
14
+ - type : max-score-picker
15
+ - type : pd-profile-handler
16
+ parameters :
17
+ threshold : 10
18
+ hashBlockSize : 5
19
+ schedulingProfiles :
20
+ - name : prefill
21
+ plugins :
22
+ - pluginRef : prefill-filter
23
+ - pluginRef : max-score-picker
24
+ - pluginRef : prefix-cache-scorer
25
+ weight : 2
26
+ - name : decode
27
+ plugins :
28
+ - pluginRef : decode-filter
29
+ - pluginRef : max-score-picker
30
+ - pluginRef : prefix-cache-scorer
31
+ weight : 2
Original file line number Diff line number Diff line change 8
8
containers :
9
9
- name : epp
10
10
image : ${EPP_IMAGE}:${EPP_TAG}
11
+ imagePullPolicy : Always
11
12
args :
12
13
- -poolName
13
14
- ${POOL_NAME}
Original file line number Diff line number Diff line change @@ -58,9 +58,9 @@ export VLLM_REPLICA_COUNT_P="${VLLM_REPLICA_COUNT_P:-1}"
58
58
export VLLM_REPLICA_COUNT_D=" ${VLLM_REPLICA_COUNT_D:- 2} "
59
59
60
60
if [ " ${PD_ENABLED} " != " \" true\" " ]; then
61
- DEFAULT_EPP_CONFIG=" deploy/config/epp-config.yaml"
61
+ DEFAULT_EPP_CONFIG=" deploy/config/sim- epp-config.yaml"
62
62
else
63
- DEFAULT_EPP_CONFIG=" deploy/config/pd-epp-config.yaml"
63
+ DEFAULT_EPP_CONFIG=" deploy/config/sim- pd-epp-config.yaml"
64
64
fi
65
65
export EPP_CONFIG=" ${EPP_CONFIG:- ${DEFAULT_EPP_CONFIG} } "
66
66
# ------------------------------------------------------------------------------
@@ -170,7 +170,7 @@ if [ "${PD_ENABLED}" != "\"true\"" ]; then
170
170
KUSTOMIZE_DIR=" deploy/environments/dev/kind-istio"
171
171
else
172
172
KUSTOMIZE_DIR=" deploy/environments/dev/kind-istio-pd"
173
- fi
173
+ fi
174
174
175
175
kubectl --context ${KUBE_CONTEXT} create configmap epp-config --from-file=epp-config.yaml=${EPP_CONFIG}
176
176
You can’t perform that action at this time.
0 commit comments