[Standup] Enable the use of gateway provided by RHOAI (#646)

maugustosilva · web-flow · commit 79b113f1a0bb · 2026-02-07T17:20:07.000-05:00
Two envirobnment variables require change:

```
LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=data-science-gateway-class
LLMDBENCH_VLLM_MODELSERVICE_INFERENCEPOOL_API=inference.networking.x-k8s.io/v1alpha2
```

The resulting gateway exposes only port `443` (https)

`run.sh`  was also adjusted to be able to detect reachability by
different ports/protocols

Signed-off-by: maugustosilva &lt;maugusto.silva@gmail.com&gt;
diff --git a/scenarios/examples/gpu.sh b/scenarios/examples/gpu.sh
@@ -28,8 +28,10 @@
 ######export LLMDBENCH_DEPLOY_METHODS=standalone
 #export LLMDBENCH_DEPLOY_METHODS=modelservice
 
-#export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=istio
+#export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=data-science-gateway-class
+#export LLMDBENCH_VLLM_MODELSERVICE_INFERENCEPOOL_API=inference.networking.x-k8s.io/v1alpha2
 
+#export LLMDBENCH_VLLM_MODELSERVICE_MULTINODE=true
 
 #             Affinity to select node with appropriate accelerator (leave uncommented to automatically detect GPU... WILL WORK FOR OpenShift, Kubernetes and GKE)
 #export LLMDBENCH_VLLM_COMMON_AFFINITY=nvidia.com/gpu.product:NVIDIA-H100-80GB-HBM3        # OpenShift
diff --git a/setup/env.sh b/setup/env.sh
@@ -18,7 +18,7 @@ export LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_TAG=${LLMDBENCH_LLMD_ROUTINGSIDECAR_I
 export LLMDBENCH_VLLM_STANDALONE_IMAGE_TAG=${LLMDBENCH_VLLM_STANDALONE_IMAGE_TAG:-auto}
 export LLMDBENCH_GATEWAY_PROVIDER_KGATEWAY_CHART_VERSION=${LLMDBENCH_GATEWAY_PROVIDER_KGATEWAY_CHART_VERSION:-"v2.1.1"}
 export LLMDBENCH_GATEWAY_PROVIDER_ISTIO_CHART_VERSION=${LLMDBENCH_GATEWAY_PROVIDER_ISTIO_CHART_VERSION:-"1.28.1"}
-export LLMDBENCH_VLLM_INFRA_CHART_VERSION=${LLMDBENCH_VLLM_INFRA_CHART_VERSION:-v1.3.5}
+export LLMDBENCH_VLLM_INFRA_CHART_VERSION=${LLMDBENCH_VLLM_INFRA_CHART_VERSION:-v1.3.8}
 export LLMDBENCH_GATEWAY_API_CRD_REVISION=${LLMDBENCH_GATEWAY_API_CRD_REVISION:-"v1.3.0"}
 export LLMDBENCH_WVA_CHART_VERSION="${LLMDBENCH_WVA_CHART_VERSION:-0.4.2}"
 #FIXME: oci helm repos do not output a list of versions. Use "skopeo list-tags  docker://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool"
@@ -196,6 +196,7 @@ export LLMDBENCH_VLLM_MODELSERVICE_HELM_REPOSITORY=${LLMDBENCH_VLLM_MODELSERVICE
 export LLMDBENCH_VLLM_MODELSERVICE_HELM_REPOSITORY_URL=${LLMDBENCH_VLLM_MODELSERVICE_HELM_REPOSITORY_URL:-"https://llm-d-incubation.github.io/llm-d-modelservice/"}
 export LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL=${LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL:-"pvc"}
 export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=${LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME:-"istio"}
+export LLMDBENCH_VLLM_MODELSERVICE_INFERENCEPOOL_API=${LLMDBENCH_VLLM_MODELSERVICE_INFERENCEPOOL_API:-"inference.networking.k8s.io/v1"}
 export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_SERVICE_TYPE=${LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_SERVICE_TYPE:-NodePort}
 export LLMDBENCH_VLLM_MODELSERVICE_ROUTE=${LLMDBENCH_VLLM_MODELSERVICE_ROUTE:-false}
 # Endpoint Picker Parameters
diff --git a/setup/functions.py b/setup/functions.py
@@ -1854,16 +1854,20 @@ def get_model_name_from_pod(api: pykube.HTTPClient,
     if not ip :
         return "empty", "N/A"
 
-    pod_name = f"testinference-pod-{get_rand_string()}"
-    if "http://" not in ip:
-        ip = "http://" + ip
+
+    protocol = 'http'
+    if port == '443' :
+        protocol = 'https'
+    if f"{protocol}://" not in ip:
+        ip = f"{protocol}://" + ip
     if ip.count(":") == 1:
         ip = ip + ":" + port
     ip = ip + "/v1/models"
-    curl_command = f"curl --no-progress-meter {ip}"
-    full_command = ["/bin/bash", "-c", f"curl --no-progress-meter {ip}"]
+    curl_command = f"curl -k --no-progress-meter {ip}"
+    full_command = ["/bin/bash", "-c", f"{curl_command}"]
 
     while current_attempts <= total_attempts :
+        pod_name = f"testinference-pod-{get_rand_string()}"
         pod_manifest = client.V1Pod(
             metadata=client.V1ObjectMeta(name=pod_name, namespace=ev['vllm_common_namespace'], labels={"llm-d.ai/id": f"{pod_name}"}),
             spec=client.V1PodSpec(
@@ -1965,7 +1969,10 @@ def wait_for_pods_created_running_ready(api_client, ev: dict, component_nr: int,
         label_selector=f"llm-d.ai/model={ev['deploy_current_model_id_label']},llm-d.ai/role={component}"
         silent = False
     elif component in [ "gateway" ] :
-        label_selector = f"app.kubernetes.io/name=llm-d-infra"
+        if ev['vllm_modelservice_gateway_class_name'] == "data-science-gateway-class":
+            label_selector = f"gateway.istio.io/managed=istio.io-gateway-controller"
+        else :
+            label_selector = f"app.kubernetes.io/name=llm-d-infra"
         silent = False
     elif component in [ "inferencepool" ] :
         label_selector = f"inferencepool={ev['deploy_current_model_id_label']}-gaie-epp"
diff --git a/setup/functions.sh b/setup/functions.sh
@@ -621,7 +621,7 @@ function get_model_name_from_pod {
     local url=$3
     local port=$4
 
-    has_protocol=$(echo $url | grep "http://" || true)
+    has_protocol=$(echo $url | grep -E "http://|https://" || true)
     if [[ -z $has_protocol ]]; then
         local url="http://$url"
     fi
@@ -633,7 +633,7 @@ function get_model_name_from_pod {
     # --- END: Corrected Port Logic ---
 
     local url=$url/v1/models
-    local response=$(llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} run testinference-pod-$(get_rand_string) -n $namespace --attach --restart=Never --rm --image=$image --quiet --command -- bash -c \"curl --no-progress-meter $url\"" ${LLMDBENCH_CONTROL_DRY_RUN} 0 0 2 0)
+    local response=$(llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} run testinference-pod-$(get_rand_string) -n $namespace --attach --restart=Never --rm --image=$image --quiet --command -- bash -c \"curl -k --no-progress-meter $url\"" ${LLMDBENCH_CONTROL_DRY_RUN} 0 0 2 0)
     is_jq=$(echo $response | jq -r . || true)
 
     if [[ -z $is_jq ]]; then
diff --git a/setup/run.sh b/setup/run.sh
@@ -270,6 +270,7 @@ for method in ${LLMDBENCH_DEPLOY_METHODS//,/ }; do
       export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_PORT=
       export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_VLLM_PORT=
       export LLMDBENCH_VLLM_FQDN=".${LLMDBENCH_VLLM_COMMON_NAMESPACE}${LLMDBENCH_VLLM_COMMON_FQDN}"
+      export LLMDBENCH_HARNESS_STACK_ENDPOINT_PROTOCOL=http
 
       if [[ $LLMDBENCH_CONTROL_ENVIRONMENT_TYPE_STANDALONE_ACTIVE -eq 1 ]]; then
         export LLMDBENCH_CONTROL_ENV_VAR_LIST_TO_POD="LLMDBENCH_RUN_EXPERIMENT|LLMDBENCH_BASE64_CONTEXT_CONTENTS|^LLMDBENCH_VLLM_COMMON|^LLMDBENCH_VLLM_STANDALONE|^LLMDBENCH_DEPLOY|^LLMDBENCH_HARNESS|^LLMDBENCH_RUN"
@@ -291,6 +292,11 @@ for method in ${LLMDBENCH_DEPLOY_METHODS//,/ }; do
             export LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME=${LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME}${LLMDBENCH_VLLM_FQDN}
         fi
         export LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT=80
+        _listener_name=$(echo $LLMDBENCH_HARNESS_STACK_ENDPOINT_INFO | jq -r '.items[0].spec.listeners[0].name')
+        if [[ ${_listener_name} == "https" ]]; then
+          export LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT=443
+          export LLMDBENCH_HARNESS_STACK_ENDPOINT_PROTOCOL=https
+        fi
         export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_PORT=81
         export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_VLLM_PORT=82
       fi
@@ -304,7 +310,7 @@ for method in ${LLMDBENCH_DEPLOY_METHODS//,/ }; do
         export LLMDBENCH_HARNESS_STACK_TYPE=vllm-prod
         export LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME=$(${LLMDBENCH_CONTROL_KCMD} --namespace "$LLMDBENCH_VLLM_COMMON_NAMESPACE" get service --no-headers | awk '{print $1}' | grep ${LLMDBENCH_DEPLOY_METHODS} || true)
         if [[ ! -z $LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME ]]; then
-          for i in default http; do
+          for i in default http https; do
             export LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT=$(${LLMDBENCH_CONTROL_KCMD} --namespace "$LLMDBENCH_VLLM_COMMON_NAMESPACE" get service/$LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME --no-headers -o json | jq -r ".spec.ports[] | select(.name == \"$i\") | .port")
             if [[ ! -z $LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT ]]; then
               break
@@ -368,7 +374,7 @@ for method in ${LLMDBENCH_DEPLOY_METHODS//,/ }; do
         exit 1
       fi
 
-      export LLMDBENCH_HARNESS_STACK_ENDPOINT_URL="http://${LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME}:${LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT}"
+      export LLMDBENCH_HARNESS_STACK_ENDPOINT_URL="${LLMDBENCH_HARNESS_STACK_ENDPOINT_PROTOCOL}://${LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME}:${LLMDBENCH_HARNESS_STACK_ENDPOINT_PORT}"
 
       export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_URL="http://${LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME}:${LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_PORT}"
       export LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_VLLM_URL="http://${LLMDBENCH_HARNESS_STACK_ENDPOINT_NAME}:${LLMDBENCH_HARNESS_STACK_ENDPOINT_LAUNCHER_VLLM_PORT}"
diff --git a/setup/steps/07_deploy_setup.py b/setup/steps/07_deploy_setup.py
@@ -46,6 +46,49 @@ def gateway_values(provider : str, host: str, service: str) -> str:
     enabled: true
   """
 
+    elif provider == "data-science-gateway-class" :
+        return f"""gateway:
+  gatewayClassName: data-science-gateway-class
+  labels:
+    istio.io/rev: openshift-gateway
+    platform.opendatahub.io/part-of: gatewayconfig
+
+  listeners:
+    - name: https
+      port: 443
+      protocol: HTTPS
+      allowedRoutes:
+        namespaces:
+          from: All
+      tls:
+        mode: Terminate
+        certificateRefs:
+          - group: ""
+            kind: Secret
+            name: data-science-gateway-service-tls
+            namespace: openshift-ingress
+
+  destinationRule:
+      enabled: true
+      trafficPolicy:
+        connectionPool:
+          http:
+            http1MaxPendingRequests: 256000
+            maxRequestsPerConnection: 256000
+            http2MaxRequests: 256000
+            idleTimeout: "900s"
+          tcp:
+            maxConnections: 256000
+            maxConnectionDuration: "1800s"
+            connectTimeout: "900s"
+
+  tls:
+    referenceGrant:
+      enabled: true
+      secretNamespace: openshift-ingress
+      secretName: data-science-gateway-service-tls
+  """
+
     elif provider == "gke":
         return f"""gateway:
   gatewayClassName: gke-l7-regional-external-managed
diff --git a/setup/steps/08_deploy_gaie.py b/setup/steps/08_deploy_gaie.py
@@ -23,7 +23,7 @@
 )
 
 def provider(provider: str) -> str:
-    if provider == "gke" or provider == "openshift-default" or provider == "istio":
+    if provider == "gke" or provider == "istio":
         return provider
     return "none"
 
@@ -165,7 +165,7 @@ def main():
 inferencePool:
   targetPortNumber: {ev['vllm_common_inference_port']}
   modelServerType: vllm
-  apiVersion: "inference.networking.k8s.io/v1"
+  apiVersion: "{ev['vllm_modelservice_inferencepool_api']}"
   modelServers:
     matchLabels:
       llm-d.ai/inferenceServing: "true"
diff --git a/setup/steps/09_deploy_via_modelservice.py b/setup/steps/09_deploy_via_modelservice.py
@@ -277,7 +277,7 @@ def define_httproute(
       name: infra-{release}-inference-gateway
   rules:
     - backendRefs:
-      - group: inference.networking.k8s.io
+      - group: {ev['vllm_modelservice_inferencepool_api'].split('/')[0]}
         kind: InferencePool
         name: {model_id_label}-gaie
         port: {service_port}
@@ -300,7 +300,7 @@ def define_httproute(
     if single_model:
       manifest = f"""{manifest}
     - backendRefs:
-      - group: inference.networking.k8s.io
+      - group: {ev['vllm_modelservice_inferencepool_api'].split('/')[0]}
         kind: InferencePool
         name: {model_id_label}-gaie
         port: {service_port}
diff --git a/setup/steps/10_smoketest.py b/setup/steps/10_smoketest.py
@@ -8,6 +8,7 @@
 import pykube
 import ipaddress
 
+
 # Add project root to path for imports
 current_file = Path(__file__).resolve()
 project_root = current_file.parents[1]
@@ -44,6 +45,8 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
     service_hostname = "N/A"
     service_name = "N/A"
 
+    gateway_port = "80"
+
     if is_standalone_deployment(ev):
         pod_string = "standalone"
         try:
@@ -70,6 +73,15 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
                 plural="gateways"
             )
             for service in gateways['items']:
+
+                for mf in service["metadata"]["managedFields"] :
+                    if 'fieldsV1' in mf :
+                        if 'f:status' in mf['fieldsV1'] :
+                            if 'f:listeners' in mf['fieldsV1']['f:status'] :
+                                for k in mf['fieldsV1']['f:status']['f:listeners'].keys() :
+                                    if k.count('https') :
+                                        gateway_port = "443"
+
                 if service['metadata']['name'] == f"infra-{ev.get('vllm_modelservice_release', '')}-inference-gateway":
                     service_name = service['metadata']['name']
                     if "addresses" in service["status"] :
@@ -151,12 +163,12 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
                 return 1
 
     announce(f"✅ All pods respond successfully")
-    announce(f"🚀 Testing service/gateway \"{service_ip}\" (port 80)...")
+    announce(f"🚀 Testing service/gateway \"{service_ip}\" (port {gateway_port})...")
 
     if dry_run:
         announce(f"✅ [DRY RUN] Service responds successfully ({current_model})")
     else:
-        received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, service_ip, "80")
+        received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, service_ip, gateway_port)
         if received_model_name == current_model:
             announce(f"✅ Service responds successfully ({received_model_name})")
         else:
@@ -187,9 +199,9 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
     if ev['control_deploy_is_openshift'] == "1" and route_url:
         announce(f"🚀 Testing external route \"{route_url}\"...")
         if is_standalone_deployment(ev):
-            received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, route_url, '80')
+            received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, route_url, '443')
         else:
-            received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, route_url, '80')
+            received_model_name, curl_command_used = get_model_name_from_pod(api, client, ev, route_url, '443')
         if received_model_name == current_model:
             announce(f"✅ External route responds successfully ({received_model_name})")
         else: