File tree Expand file tree Collapse file tree 7 files changed +59
-18
lines changed
components/inference-gateway
environments/dev/kubernetes-kgateway Expand file tree Collapse file tree 7 files changed +59
-18
lines changed Original file line number Diff line number Diff line change 1919 containers :
2020 - name : epp
2121 image : ghcr.io/llm-d/llm-d-inference-scheduler:latest
22- imagePullPolicy : Always
22+ imagePullPolicy : IfNotPresent
2323 args :
2424 - -poolName
2525 - " ${POOL_NAME}"
Original file line number Diff line number Diff line change @@ -4,10 +4,6 @@ apiVersion: inference.networking.x-k8s.io/v1alpha1
44kind : EndpointPickerConfig
55plugins :
66- type : prefix-cache-scorer
7- parameters :
8- hashBlockSize : 5
9- maxPrefixBlocksToMatch : 256
10- lruCapacityPerServer : 31250
117- type : decode-filter
128- type : max-score-picker
139- type : single-profile-handler
@@ -17,4 +13,4 @@ schedulingProfiles:
1713 - pluginRef : decode-filter
1814 - pluginRef : max-score-picker
1915 - pluginRef : prefix-cache-scorer
20- weight : 50
16+ weight : 2
Original file line number Diff line number Diff line change @@ -4,27 +4,20 @@ kind: EndpointPickerConfig
44plugins :
55- type : prefill-header-handler
66- type : prefix-cache-scorer
7- parameters :
8- hashBlockSize : 5
9- maxPrefixBlocksToMatch : 256
10- lruCapacityPerServer : 31250
117- type : prefill-filter
128- type : decode-filter
139- type : max-score-picker
1410- type : pd-profile-handler
15- parameters :
16- threshold : 10
17- hashBlockSize : 5
1811schedulingProfiles :
1912- name : prefill
2013 plugins :
2114 - pluginRef : prefill-filter
2215 - pluginRef : max-score-picker
2316 - pluginRef : prefix-cache-scorer
24- weight : 50
17+ weight : 2
2518- name : decode
2619 plugins :
2720 - pluginRef : decode-filter
2821 - pluginRef : max-score-picker
2922 - pluginRef : prefix-cache-scorer
30- weight : 50
23+ weight : 2
Original file line number Diff line number Diff line change 1+ # Sample EPP configuration for running without P/D
2+ # with small hash block size for simulation purposes
3+ apiVersion : inference.networking.x-k8s.io/v1alpha1
4+ kind : EndpointPickerConfig
5+ plugins :
6+ - type : prefix-cache-scorer
7+ parameters :
8+ hashBlockSize : 5
9+ maxPrefixBlocksToMatch : 256
10+ lruCapacityPerServer : 31250
11+ - type : decode-filter
12+ - type : max-score-picker
13+ - type : single-profile-handler
14+ schedulingProfiles :
15+ - name : default
16+ plugins :
17+ - pluginRef : decode-filter
18+ - pluginRef : max-score-picker
19+ - pluginRef : prefix-cache-scorer
20+ weight : 2
Original file line number Diff line number Diff line change 1+ # Sample EPP configuration for tunning with P/D
2+ # Use with small hash block size for simulation purposes
3+ apiVersion : inference.networking.x-k8s.io/v1alpha1
4+ kind : EndpointPickerConfig
5+ plugins :
6+ - type : prefill-header-handler
7+ - type : prefix-cache-scorer
8+ parameters :
9+ hashBlockSize : 5
10+ maxPrefixBlocksToMatch : 256
11+ lruCapacityPerServer : 31250
12+ - type : prefill-filter
13+ - type : decode-filter
14+ - type : max-score-picker
15+ - type : pd-profile-handler
16+ parameters :
17+ threshold : 10
18+ hashBlockSize : 5
19+ schedulingProfiles :
20+ - name : prefill
21+ plugins :
22+ - pluginRef : prefill-filter
23+ - pluginRef : max-score-picker
24+ - pluginRef : prefix-cache-scorer
25+ weight : 2
26+ - name : decode
27+ plugins :
28+ - pluginRef : decode-filter
29+ - pluginRef : max-score-picker
30+ - pluginRef : prefix-cache-scorer
31+ weight : 2
Original file line number Diff line number Diff line change 88 containers :
99 - name : epp
1010 image : ${EPP_IMAGE}:${EPP_TAG}
11+ imagePullPolicy : Always
1112 args :
1213 - -poolName
1314 - ${POOL_NAME}
Original file line number Diff line number Diff line change @@ -58,9 +58,9 @@ export VLLM_REPLICA_COUNT_P="${VLLM_REPLICA_COUNT_P:-1}"
5858export VLLM_REPLICA_COUNT_D=" ${VLLM_REPLICA_COUNT_D:- 2} "
5959
6060if [ " ${PD_ENABLED} " != " \" true\" " ]; then
61- DEFAULT_EPP_CONFIG=" deploy/config/epp-config.yaml"
61+ DEFAULT_EPP_CONFIG=" deploy/config/sim- epp-config.yaml"
6262else
63- DEFAULT_EPP_CONFIG=" deploy/config/pd-epp-config.yaml"
63+ DEFAULT_EPP_CONFIG=" deploy/config/sim- pd-epp-config.yaml"
6464fi
6565export EPP_CONFIG=" ${EPP_CONFIG:- ${DEFAULT_EPP_CONFIG} } "
6666# ------------------------------------------------------------------------------
@@ -170,7 +170,7 @@ if [ "${PD_ENABLED}" != "\"true\"" ]; then
170170 KUSTOMIZE_DIR=" deploy/environments/dev/kind-istio"
171171else
172172 KUSTOMIZE_DIR=" deploy/environments/dev/kind-istio-pd"
173- fi
173+ fi
174174
175175kubectl --context ${KUBE_CONTEXT} create configmap epp-config --from-file=epp-config.yaml=${EPP_CONFIG}
176176
You can’t perform that action at this time.
0 commit comments