Skip to content

Commit 79b113f

Browse files
[Standup] Enable the use of gateway provided by RHOAI (#646)
Two envirobnment variables require change: ``` LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=data-science-gateway-class LLMDBENCH_VLLM_MODELSERVICE_INFERENCEPOOL_API=inference.networking.x-k8s.io/v1alpha2 ``` The resulting gateway exposes only port `443` (https) `run.sh` was also adjusted to be able to detect reachability by different ports/protocols Signed-off-by: maugustosilva <maugusto.silva@gmail.com>
1 parent 4b1afb7 commit 79b113f

File tree

9 files changed

+91
-20
lines changed

9 files changed

+91
-20
lines changed

scenarios/examples/gpu.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@
2828
######export LLMDBENCH_DEPLOY_METHODS=standalone
2929
#export LLMDBENCH_DEPLOY_METHODS=modelservice
3030

31-
#export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=istio
31+
#export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=data-science-gateway-class
32+
#export LLMDBENCH_VLLM_MODELSERVICE_INFERENCEPOOL_API=inference.networking.x-k8s.io/v1alpha2
3233

34+
#export LLMDBENCH_VLLM_MODELSERVICE_MULTINODE=true
3335

3436
# Affinity to select node with appropriate accelerator (leave uncommented to automatically detect GPU... WILL WORK FOR OpenShift, Kubernetes and GKE)
3537
#export LLMDBENCH_VLLM_COMMON_AFFINITY=nvidia.com/gpu.product:NVIDIA-H100-80GB-HBM3 # OpenShift

setup/env.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ export LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_TAG=${LLMDBENCH_LLMD_ROUTINGSIDECAR_I
1818
export LLMDBENCH_VLLM_STANDALONE_IMAGE_TAG=${LLMDBENCH_VLLM_STANDALONE_IMAGE_TAG:-auto}
1919
export LLMDBENCH_GATEWAY_PROVIDER_KGATEWAY_CHART_VERSION=${LLMDBENCH_GATEWAY_PROVIDER_KGATEWAY_CHART_VERSION:-"v2.1.1"}
2020
export LLMDBENCH_GATEWAY_PROVIDER_ISTIO_CHART_VERSION=${LLMDBENCH_GATEWAY_PROVIDER_ISTIO_CHART_VERSION:-"1.28.1"}
21-
export LLMDBENCH_VLLM_INFRA_CHART_VERSION=${LLMDBENCH_VLLM_INFRA_CHART_VERSION:-v1.3.5}
21+
export LLMDBENCH_VLLM_INFRA_CHART_VERSION=${LLMDBENCH_VLLM_INFRA_CHART_VERSION:-v1.3.8}
2222
export LLMDBENCH_GATEWAY_API_CRD_REVISION=${LLMDBENCH_GATEWAY_API_CRD_REVISION:-"v1.3.0"}
2323
export LLMDBENCH_WVA_CHART_VERSION="${LLMDBENCH_WVA_CHART_VERSION:-0.4.2}"
2424
#FIXME: oci helm repos do not output a list of versions. Use "skopeo list-tags docker://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool"
@@ -196,6 +196,7 @@ export LLMDBENCH_VLLM_MODELSERVICE_HELM_REPOSITORY=${LLMDBENCH_VLLM_MODELSERVICE
196196
export LLMDBENCH_VLLM_MODELSERVICE_HELM_REPOSITORY_URL=${LLMDBENCH_VLLM_MODELSERVICE_HELM_REPOSITORY_URL:-"https://llm-d-incubation.github.io/llm-d-modelservice/"}
197197
export LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL=${LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL:-"pvc"}
198198
export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=${LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME:-"istio"}
199+
export LLMDBENCH_VLLM_MODELSERVICE_INFERENCEPOOL_API=${LLMDBENCH_VLLM_MODELSERVICE_INFERENCEPOOL_API:-"inference.networking.k8s.io/v1"}
199200
export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_SERVICE_TYPE=${LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_SERVICE_TYPE:-NodePort}
200201
export LLMDBENCH_VLLM_MODELSERVICE_ROUTE=${LLMDBENCH_VLLM_MODELSERVICE_ROUTE:-false}
201202
# Endpoint Picker Parameters

setup/functions.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,16 +1854,20 @@ def get_model_name_from_pod(api: pykube.HTTPClient,
18541854
if not ip :
18551855
return "empty", "N/A"
18561856

1857-
pod_name = f"testinference-pod-{get_rand_string()}"
1858-
if "http://" not in ip:
1859-
ip = "http://" + ip
1857+
1858+
protocol = 'http'
1859+
if port == '443' :
1860+
protocol = 'https'
1861+
if f"{protocol}://" not in ip:
1862+
ip = f"{protocol}://" + ip
18601863
if ip.count(":") == 1:
18611864
ip = ip + ":" + port
18621865
ip = ip + "/v1/models"
1863-
curl_command = f"curl --no-progress-meter {ip}"
1864-
full_command = ["/bin/bash", "-c", f"curl --no-progress-meter {ip}"]
1866+
curl_command = f"curl -k --no-progress-meter {ip}"
1867+
full_command = ["/bin/bash", "-c", f"{curl_command}"]
18651868

18661869
while current_attempts <= total_attempts :
1870+
pod_name = f"testinference-pod-{get_rand_string()}"
18671871
pod_manifest = client.V1Pod(
18681872
metadata=client.V1ObjectMeta(name=pod_name, namespace=ev['vllm_common_namespace'], labels={"llm-d.ai/id": f"{pod_name}"}),
18691873
spec=client.V1PodSpec(
@@ -1965,7 +1969,10 @@ def wait_for_pods_created_running_ready(api_client, ev: dict, component_nr: int,
19651969
label_selector=f"llm-d.ai/model={ev['deploy_current_model_id_label']},llm-d.ai/role={component}"
19661970
silent = False
19671971
elif component in [ "gateway" ] :
1968-
label_selector = f"app.kubernetes.io/name=llm-d-infra"
1972+
if ev['vllm_modelservice_gateway_class_name'] == "data-science-gateway-class":
1973+
label_selector = f"gateway.istio.io/managed=istio.io-gateway-controller"
1974+
else :
1975+
label_selector = f"app.kubernetes.io/name=llm-d-infra"
19691976
silent = False
19701977
elif component in [ "inferencepool" ] :
19711978
label_selector = f"inferencepool={ev['deploy_current_model_id_label']}-gaie-epp"

setup/functions.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ function get_model_name_from_pod {
621621
local url=$3
622622
local port=$4
623623

624-
has_protocol=$(echo $url | grep "http://" || true)
624+
has_protocol=$(echo $url | grep -E "http://|https://" || true)
625625
if [[ -z $has_protocol ]]; then
626626
local url="http://$url"
627627
fi
@@ -633,7 +633,7 @@ function get_model_name_from_pod {
633633
# --- END: Corrected Port Logic ---
634634

635635
local url=$url/v1/models
636-
local response=$(llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} run testinference-pod-$(get_rand_string) -n $namespace --attach --restart=Never --rm --image=$image --quiet --command -- bash -c \"curl --no-progress-meter $url\"" ${LLMDBENCH_CONTROL_DRY_RUN} 0 0 2 0)
636+
local response=$(llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} run testinference-pod-$(get_rand_string) -n $namespace --attach --restart=Never --rm --image=$image --quiet --command -- bash -c \"curl -k --no-progress-meter $url\"" ${LLMDBENCH_CONTROL_DRY_RUN} 0 0 2 0)
637637
is_jq=$(echo $response | jq -r . || true)
638638

639639
if [[ -z $is_jq ]]; then

setup/run.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ for method in ${LLMDBENCH_DEPLOY_METHODS//,/ }; do
270270
export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_PORT=
271271
export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_VLLM_PORT=
272272
export LLMDBENCH_VLLM_FQDN=".${LLMDBENCH_VLLM_COMMON_NAMESPACE}${LLMDBENCH_VLLM_COMMON_FQDN}"
273+
export LLMDBENCH_HARNESS_STACK_ENDPOINT_PROTOCOL=http
273274

274275
if [[ $LLMDBENCH_CONTROL_ENVIRONMENT_TYPE_STANDALONE_ACTIVE -eq 1 ]]; then
275276
export LLMDBENCH_CONTROL_ENV_VAR_LIST_TO_POD="LLMDBENCH_RUN_EXPERIMENT|LLMDBENCH_BASE64_CONTEXT_CONTENTS|^LLMDBENCH_VLLM_COMMON|^LLMDBENCH_VLLM_STANDALONE|^LLMDBENCH_DEPLOY|^LLMDBENCH_HARNESS|^LLMDBENCH_RUN"
@@ -291,6 +292,11 @@ for method in ${LLMDBENCH_DEPLOY_METHODS//,/ }; do
291292
export LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME=${LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME}${LLMDBENCH_VLLM_FQDN}
292293
fi
293294
export LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT=80
295+
_listener_name=$(echo $LLMDBENCH_HARNESS_STACK_ENDPOINT_INFO | jq -r '.items[0].spec.listeners[0].name')
296+
if [[ ${_listener_name} == "https" ]]; then
297+
export LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT=443
298+
export LLMDBENCH_HARNESS_STACK_ENDPOINT_PROTOCOL=https
299+
fi
294300
export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_PORT=81
295301
export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_VLLM_PORT=82
296302
fi
@@ -304,7 +310,7 @@ for method in ${LLMDBENCH_DEPLOY_METHODS//,/ }; do
304310
export LLMDBENCH_HARNESS_STACK_TYPE=vllm-prod
305311
export LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME=$(${LLMDBENCH_CONTROL_KCMD} --namespace "$LLMDBENCH_VLLM_COMMON_NAMESPACE" get service --no-headers | awk '{print $1}' | grep ${LLMDBENCH_DEPLOY_METHODS} || true)
306312
if [[ ! -z $LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME ]]; then
307-
for i in default http; do
313+
for i in default http https; do
308314
export LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT=$(${LLMDBENCH_CONTROL_KCMD} --namespace "$LLMDBENCH_VLLM_COMMON_NAMESPACE" get service/$LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME --no-headers -o json | jq -r ".spec.ports[] | select(.name == \"$i\") | .port")
309315
if [[ ! -z $LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT ]]; then
310316
break
@@ -368,7 +374,7 @@ for method in ${LLMDBENCH_DEPLOY_METHODS//,/ }; do
368374
exit 1
369375
fi
370376

371-
export LLMDBENCH_HARNESS_STACK_ENDPOINT_URL="http://${LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME}:${LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT}"
377+
export LLMDBENCH_HARNESS_STACK_ENDPOINT_URL="${LLMDBENCH_HARNESS_STACK_ENDPOINT_PROTOCOL}://${LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME}:${LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT}"
372378

373379
export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_URL="http://${LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME}:${LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_PORT}"
374380
export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_VLLM_URL="http://${LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME}:${LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_VLLM_PORT}"

setup/steps/07_deploy_setup.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,49 @@ def gateway_values(provider : str, host: str, service: str) -> str:
4646
enabled: true
4747
"""
4848

49+
elif provider == "data-science-gateway-class" :
50+
return f"""gateway:
51+
gatewayClassName: data-science-gateway-class
52+
labels:
53+
istio.io/rev: openshift-gateway
54+
platform.opendatahub.io/part-of: gatewayconfig
55+
56+
listeners:
57+
- name: https
58+
port: 443
59+
protocol: HTTPS
60+
allowedRoutes:
61+
namespaces:
62+
from: All
63+
tls:
64+
mode: Terminate
65+
certificateRefs:
66+
- group: ""
67+
kind: Secret
68+
name: data-science-gateway-service-tls
69+
namespace: openshift-ingress
70+
71+
destinationRule:
72+
enabled: true
73+
trafficPolicy:
74+
connectionPool:
75+
http:
76+
http1MaxPendingRequests: 256000
77+
maxRequestsPerConnection: 256000
78+
http2MaxRequests: 256000
79+
idleTimeout: "900s"
80+
tcp:
81+
maxConnections: 256000
82+
maxConnectionDuration: "1800s"
83+
connectTimeout: "900s"
84+
85+
tls:
86+
referenceGrant:
87+
enabled: true
88+
secretNamespace: openshift-ingress
89+
secretName: data-science-gateway-service-tls
90+
"""
91+
4992
elif provider == "gke":
5093
return f"""gateway:
5194
gatewayClassName: gke-l7-regional-external-managed

setup/steps/08_deploy_gaie.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
)
2424

2525
def provider(provider: str) -> str:
26-
if provider == "gke" or provider == "openshift-default" or provider == "istio":
26+
if provider == "gke" or provider == "istio":
2727
return provider
2828
return "none"
2929

@@ -165,7 +165,7 @@ def main():
165165
inferencePool:
166166
targetPortNumber: {ev['vllm_common_inference_port']}
167167
modelServerType: vllm
168-
apiVersion: "inference.networking.k8s.io/v1"
168+
apiVersion: "{ev['vllm_modelservice_inferencepool_api']}"
169169
modelServers:
170170
matchLabels:
171171
llm-d.ai/inferenceServing: "true"

setup/steps/09_deploy_via_modelservice.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def define_httproute(
277277
name: infra-{release}-inference-gateway
278278
rules:
279279
- backendRefs:
280-
- group: inference.networking.k8s.io
280+
- group: {ev['vllm_modelservice_inferencepool_api'].split('/')[0]}
281281
kind: InferencePool
282282
name: {model_id_label}-gaie
283283
port: {service_port}
@@ -300,7 +300,7 @@ def define_httproute(
300300
if single_model:
301301
manifest = f"""{manifest}
302302
- backendRefs:
303-
- group: inference.networking.k8s.io
303+
- group: {ev['vllm_modelservice_inferencepool_api'].split('/')[0]}
304304
kind: InferencePool
305305
name: {model_id_label}-gaie
306306
port: {service_port}

setup/steps/10_smoketest.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pykube
99
import ipaddress
1010

11+
1112
# Add project root to path for imports
1213
current_file = Path(__file__).resolve()
1314
project_root = current_file.parents[1]
@@ -44,6 +45,8 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
4445
service_hostname = "N/A"
4546
service_name = "N/A"
4647

48+
gateway_port = "80"
49+
4750
if is_standalone_deployment(ev):
4851
pod_string = "standalone"
4952
try:
@@ -70,6 +73,15 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
7073
plural="gateways"
7174
)
7275
for service in gateways['items']:
76+
77+
for mf in service["metadata"]["managedFields"] :
78+
if 'fieldsV1' in mf :
79+
if 'f:status' in mf['fieldsV1'] :
80+
if 'f:listeners' in mf['fieldsV1']['f:status'] :
81+
for k in mf['fieldsV1']['f:status']['f:listeners'].keys() :
82+
if k.count('https') :
83+
gateway_port = "443"
84+
7385
if service['metadata']['name'] == f"infra-{ev.get('vllm_modelservice_release', '')}-inference-gateway":
7486
service_name = service['metadata']['name']
7587
if "addresses" in service["status"] :
@@ -151,12 +163,12 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
151163
return 1
152164

153165
announce(f"✅ All pods respond successfully")
154-
announce(f"🚀 Testing service/gateway \"{service_ip}\" (port 80)...")
166+
announce(f"🚀 Testing service/gateway \"{service_ip}\" (port {gateway_port})...")
155167

156168
if dry_run:
157169
announce(f"✅ [DRY RUN] Service responds successfully ({current_model})")
158170
else:
159-
received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, service_ip, "80")
171+
received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, service_ip, gateway_port)
160172
if received_model_name == current_model:
161173
announce(f"✅ Service responds successfully ({received_model_name})")
162174
else:
@@ -187,9 +199,9 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
187199
if ev['control_deploy_is_openshift'] == "1" and route_url:
188200
announce(f"🚀 Testing external route \"{route_url}\"...")
189201
if is_standalone_deployment(ev):
190-
received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, route_url, '80')
202+
received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, route_url, '443')
191203
else:
192-
received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, route_url, '80')
204+
received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, route_url, '443')
193205
if received_model_name == current_model:
194206
announce(f"✅ External route responds successfully ({received_model_name})")
195207
else:

0 commit comments

Comments
 (0)