Skip to content

Commit 963c084

Browse files
authored
build: change epp-config default yamls and image pull policy (#249)
* build: change epp-config default yaml * build: change imagePullPolicy to IfNotPresent for simulation Signed-off-by: Kfir Toledo <[email protected]> --------- Signed-off-by: Kfir Toledo <[email protected]>
1 parent 7ea73b7 commit 963c084

File tree

7 files changed

+59
-18
lines changed

7 files changed

+59
-18
lines changed

deploy/components/inference-gateway/deployments.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ spec:
1919
containers:
2020
- name: epp
2121
image: ghcr.io/llm-d/llm-d-inference-scheduler:latest
22-
imagePullPolicy: Always
22+
imagePullPolicy: IfNotPresent
2323
args:
2424
- -poolName
2525
- "${POOL_NAME}"

deploy/config/epp-config.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,6 @@ apiVersion: inference.networking.x-k8s.io/v1alpha1
44
kind: EndpointPickerConfig
55
plugins:
66
- type: prefix-cache-scorer
7-
parameters:
8-
hashBlockSize: 5
9-
maxPrefixBlocksToMatch: 256
10-
lruCapacityPerServer: 31250
117
- type: decode-filter
128
- type: max-score-picker
139
- type: single-profile-handler
@@ -17,4 +13,4 @@ schedulingProfiles:
1713
- pluginRef: decode-filter
1814
- pluginRef: max-score-picker
1915
- pluginRef: prefix-cache-scorer
20-
weight: 50
16+
weight: 2

deploy/config/pd-epp-config.yaml

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,20 @@ kind: EndpointPickerConfig
44
plugins:
55
- type: prefill-header-handler
66
- type: prefix-cache-scorer
7-
parameters:
8-
hashBlockSize: 5
9-
maxPrefixBlocksToMatch: 256
10-
lruCapacityPerServer: 31250
117
- type: prefill-filter
128
- type: decode-filter
139
- type: max-score-picker
1410
- type: pd-profile-handler
15-
parameters:
16-
threshold: 10
17-
hashBlockSize: 5
1811
schedulingProfiles:
1912
- name: prefill
2013
plugins:
2114
- pluginRef: prefill-filter
2215
- pluginRef: max-score-picker
2316
- pluginRef: prefix-cache-scorer
24-
weight: 50
17+
weight: 2
2518
- name: decode
2619
plugins:
2720
- pluginRef: decode-filter
2821
- pluginRef: max-score-picker
2922
- pluginRef: prefix-cache-scorer
30-
weight: 50
23+
weight: 2

deploy/config/sim-epp-config.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Sample EPP configuration for running without P/D
2+
# with small hash block size for simulation purposes
3+
apiVersion: inference.networking.x-k8s.io/v1alpha1
4+
kind: EndpointPickerConfig
5+
plugins:
6+
- type: prefix-cache-scorer
7+
parameters:
8+
hashBlockSize: 5
9+
maxPrefixBlocksToMatch: 256
10+
lruCapacityPerServer: 31250
11+
- type: decode-filter
12+
- type: max-score-picker
13+
- type: single-profile-handler
14+
schedulingProfiles:
15+
- name: default
16+
plugins:
17+
- pluginRef: decode-filter
18+
- pluginRef: max-score-picker
19+
- pluginRef: prefix-cache-scorer
20+
weight: 2

deploy/config/sim-pd-epp-config.yaml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Sample EPP configuration for tunning with P/D
2+
# Use with small hash block size for simulation purposes
3+
apiVersion: inference.networking.x-k8s.io/v1alpha1
4+
kind: EndpointPickerConfig
5+
plugins:
6+
- type: prefill-header-handler
7+
- type: prefix-cache-scorer
8+
parameters:
9+
hashBlockSize: 5
10+
maxPrefixBlocksToMatch: 256
11+
lruCapacityPerServer: 31250
12+
- type: prefill-filter
13+
- type: decode-filter
14+
- type: max-score-picker
15+
- type: pd-profile-handler
16+
parameters:
17+
threshold: 10
18+
hashBlockSize: 5
19+
schedulingProfiles:
20+
- name: prefill
21+
plugins:
22+
- pluginRef: prefill-filter
23+
- pluginRef: max-score-picker
24+
- pluginRef: prefix-cache-scorer
25+
weight: 2
26+
- name: decode
27+
plugins:
28+
- pluginRef: decode-filter
29+
- pluginRef: max-score-picker
30+
- pluginRef: prefix-cache-scorer
31+
weight: 2

deploy/environments/dev/kubernetes-kgateway/patch-deployments.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ spec:
88
containers:
99
- name: epp
1010
image: ${EPP_IMAGE}:${EPP_TAG}
11+
imagePullPolicy: Always
1112
args:
1213
- -poolName
1314
- ${POOL_NAME}

scripts/kind-dev-env.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ export VLLM_REPLICA_COUNT_P="${VLLM_REPLICA_COUNT_P:-1}"
5858
export VLLM_REPLICA_COUNT_D="${VLLM_REPLICA_COUNT_D:-2}"
5959

6060
if [ "${PD_ENABLED}" != "\"true\"" ]; then
61-
DEFAULT_EPP_CONFIG="deploy/config/epp-config.yaml"
61+
DEFAULT_EPP_CONFIG="deploy/config/sim-epp-config.yaml"
6262
else
63-
DEFAULT_EPP_CONFIG="deploy/config/pd-epp-config.yaml"
63+
DEFAULT_EPP_CONFIG="deploy/config/sim-pd-epp-config.yaml"
6464
fi
6565
export EPP_CONFIG="${EPP_CONFIG:-${DEFAULT_EPP_CONFIG}}"
6666
# ------------------------------------------------------------------------------
@@ -170,7 +170,7 @@ if [ "${PD_ENABLED}" != "\"true\"" ]; then
170170
KUSTOMIZE_DIR="deploy/environments/dev/kind-istio"
171171
else
172172
KUSTOMIZE_DIR="deploy/environments/dev/kind-istio-pd"
173-
fi
173+
fi
174174

175175
kubectl --context ${KUBE_CONTEXT} create configmap epp-config --from-file=epp-config.yaml=${EPP_CONFIG}
176176

0 commit comments

Comments
 (0)