Skip to content

Commit 7595c7d

Browse files
[Standup] Add a "standalone" test for pre-merge CI/CD (Kind) (#525)
* [Standup] Add a "standalone" test for pre-merge CI/CD (Kind) Fixed an issue when running in "dry-run" mode Fixed a syntax error in functions.py Signed-off-by: maugustosilva <maugusto.silva@gmail.com> * Additional fixes for better standup on a kind cluster Signed-off-by: maugustosilva <maugusto.silva@gmail.com> --------- Signed-off-by: maugustosilva <maugusto.silva@gmail.com>
1 parent f06d0ac commit 7595c7d

File tree

8 files changed

+60
-23
lines changed

8 files changed

+60
-23
lines changed

.github/workflows/ci-pr-benchmark.yaml

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,24 @@ jobs:
4848
run: pip install ./config_explorer
4949
shell: bash
5050

51-
- name: Standup a modelservice using llm-d-inference-sim
51+
- name: Standup (standalone) using llm-d-inference-sim
5252
run: |
53-
./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,7,8
53+
./setup/standup.sh -c kind_sim_fb -t standalone -s 0,1,2,4,5,6,10
54+
shell: bash
55+
56+
- name: Run harness (standalone)
57+
run: |
58+
./setup/run.sh -c kind_sim_fb --dry-run
59+
shell: bash
60+
61+
- name: Teardown (standalone)
62+
run: |
63+
./setup/teardown.sh -c kind_sim_fb -t standalone
64+
shell: bash
65+
66+
- name: Standup (modelservice) using llm-d-inference-sim
67+
run: |
68+
./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,5,7,8
5469
shell: bash
5570

5671
- name: Run harness (mock)
@@ -60,7 +75,7 @@ jobs:
6075
./setup/run.sh -c kind_sim_fb --dry-run
6176
shell: bash
6277

63-
- name: Teardown
78+
- name: Teardown (modelservice)
6479
run: |
6580
./setup/teardown.sh -c kind_sim_fb
6681
shell: bash

build/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ RUN cd vllm; \
5151

5252
ARG GUIDELLM_REPO=https://github.com/vllm-project/guidellm.git
5353
ARG GUIDELLM_BRANCH=main
54-
ARG GUIDELLM_COMMIT=ba51acf5b0ba377c5edc35109a78cd3ebb402922
54+
ARG GUIDELLM_COMMIT=f6175cdd8a88f0931bd46822ed7a71787dcd7cee
5555
RUN git clone --branch ${GUIDELLM_BRANCH} ${GUIDELLM_REPO}
5656
RUN cd guidellm; \
5757
pip install torch --index-url https://download.pytorch.org/whl/cpu; \

scenarios/cicd/kind_sim_fb.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,22 @@
11
# A scenario to capture running inference-sim on a Kind cluster without requiring GPUs
22
export LLMDBENCH_DEPLOY_METHODS=modelservice
33
export LLMDBENCH_VLLM_COMMON_REPLICAS=1
4+
export LLMDBENCH_VLLM_COMMON_ACCELERATOR_NR=0
5+
export LLMDBENCH_VLLM_COMMON_CPU_NR=0
6+
export LLMDBENCH_VLLM_COMMON_CPU_MEM=100Mi
7+
export LLMDBENCH_VLLM_COMMON_SHM_MEM=500Mi
48
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM=0
59
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM=0
610
export LLMDBENCH_VLLM_COMMON_MAX_MODEL_LEN=1024
711
export LLMDBENCH_VLLM_COMMON_AFFINITY=kubernetes.io/os:linux
812
export LLMDBENCH_CONTROL_WAIT_TIMEOUT=90
913
export LLMDBENCH_LLMD_IMAGE_NAME="llm-d-inference-sim"
1014
export LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_TAG="v0.2.0@sha256:a623a0752af0a71b7b05ebf95517848b5dbc3d8d235c1897035905632d5b7d80"
15+
export LLMDBENCH_VLLM_STANDALONE_IMAGE_REGISTRY=ghcr.io
16+
export LLMDBENCH_VLLM_STANDALONE_IMAGE_REPO=llm-d
17+
export LLMDBENCH_VLLM_STANDALONE_IMAGE_NAME=llm-d-inference-sim
18+
export LLMDBENCH_VLLM_STANDALONE_IMAGE_TAG=auto
19+
export LLMDBENCH_VLLM_STANDALONE_ARGS="/app/llm-d-inference-sim____--model____/model-cache/models/REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL____--port____REPLACE_ENV_LLMDBENCH_VLLM_COMMON_INFERENCE_PORT____--served-model-name____REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL"
1120
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_MODEL_COMMAND=imageDefault
1221
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_MODEL_COMMAND=imageDefault
1322
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_ARGS="[]"
@@ -16,7 +25,11 @@ export LLMDBENCH_VLLM_MODELSERVICE_DECODE_CPU_NR=0
1625
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_CPU_NR=0
1726
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_CPU_MEM=100Mi
1827
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_CPU_MEM=100Mi
28+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_SHM_MEM=500Mi
29+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_SHM_MEM=500Mi
1930
export LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL="hf"
31+
export LLMDBENCH_VLLM_COMMON_PVC_ACCESS_MODE="ReadWriteOnce"
2032
export LLMDBENCH_DEPLOY_MODEL_LIST="facebook/opt-125m"
33+
export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE=2Gi
2134
export LLMDBENCH_HARNESS_PVC_SIZE=3Gi
2235
export LLMDBENCH_VLLM_COMMON_ACCELERATOR_MEMORY=24 # To pass capacity planner sanity checking

setup/env.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ export LLMDBENCH_VLLM_COMMON_EXTRA_PVC_NAME=${LLMDBENCH_VLLM_COMMON_EXTRA_PVC_NA
124124
export LLMDBENCH_VLLM_COMMON_EXTRA_PVC_SIZE="${LLMDBENCH_VLLM_COMMON_EXTRA_PVC_SIZE:-10Gi}"
125125
export LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS="${LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS:-default}"
126126
export LLMDBENCH_VLLM_COMMON_PVC_DOWNLOAD_TIMEOUT=${LLMDBENCH_VLLM_COMMON_PVC_DOWNLOAD_TIMEOUT:-"2400"}
127+
export LLMDBENCH_VLLM_COMMON_PVC_ACCESS_MODE=${LLMDBENCH_VLLM_COMMON_PVC_ACCESS_MODE:-"ReadWriteMany"}
127128
export LLMDBENCH_VLLM_COMMON_HF_TOKEN_KEY="${LLMDBENCH_VLLM_COMMON_HF_TOKEN_KEY:-"HF_TOKEN"}"
128129
export LLMDBENCH_VLLM_COMMON_HF_TOKEN_NAME=${LLMDBENCH_VLLM_COMMON_HF_TOKEN_NAME:-"llm-d-hf-token"}
129130
export LLMDBENCH_VLLM_COMMON_FQDN=${LLMDBENCH_VLLM_COMMON_FQDN:-".svc.cluster.local"}

setup/functions.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ def llmdbench_execute_cmd(
290290
announce("(stderr not captured)")
291291

292292
if fatal and ecode != 0:
293-
announce(f"\ERROR: Exiting with code {ecode}.")
293+
announce(f"ERROR: Exiting with code {ecode}.")
294294
sys.exit(ecode)
295295

296296
return ecode
@@ -393,6 +393,7 @@ def validate_and_create_pvc(
393393
pvc_name: str,
394394
pvc_size: str,
395395
pvc_class: str,
396+
pvc_access_mode: str,
396397
dry_run: bool = False,
397398
):
398399
announce("Provisioning model storage…")
@@ -453,7 +454,7 @@ def validate_and_create_pvc(
453454
"namespace": namespace,
454455
},
455456
"spec": {
456-
"accessModes": ["ReadWriteMany"],
457+
"accessModes": [f"{pvc_access_mode}"],
457458
"resources": {"requests": {"storage": pvc_size}},
458459
"storageClassName": pvc_class,
459460
"volumeMode": "Filesystem",
@@ -583,6 +584,7 @@ async def wait_for_job(job_name, namespace, timeout=7200, dry_run: bool = False)
583584
announce(f"Waiting for job {job_name} to complete...")
584585

585586
if dry_run:
587+
announce(f"[DRY RUN] Evaluation job {job_name} completed successfully.")
586588
return True
587589

588590
# use async config loading
@@ -818,6 +820,8 @@ def get_image(
818820
announce(f'ERROR: Unable to find latest tag for image "{image_full_name}"')
819821
sys.exit(1)
820822

823+
announce(f"INFO: resolved image \"{image_full_name}:{image_tag}\" into \"{image_full_name}:{is_latest_tag}\"")
824+
821825
if tag_only == "1":
822826
return is_latest_tag
823827
else:

setup/steps/04_ensure_model_namespace_prepared.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ def main():
9999
pvc_name=ev["vllm_common_pvc_name"],
100100
pvc_size=ev["vllm_common_pvc_model_cache_size"],
101101
pvc_class=ev["vllm_common_pvc_storage_class"],
102-
dry_run=ev["control_dry_run"],
102+
pvc_access_mode=ev['vllm_common_pvc_access_mode'],
103+
dry_run=ev["control_dry_run"]
103104
)
104105

105106
validate_and_create_pvc(
@@ -110,6 +111,7 @@ def main():
110111
pvc_name=ev["vllm_common_extra_pvc_name"],
111112
pvc_size=ev["vllm_common_extra_pvc_size"],
112113
pvc_class=ev["vllm_common_pvc_storage_class"],
114+
pvc_access_mode=ev['vllm_common_pvc_access_mode'],
113115
dry_run=ev["control_dry_run"],
114116
)
115117

setup/steps/05_ensure_harness_namespace_prepared.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ def main():
9393
pvc_name=volume,
9494
pvc_size=ev["harness_pvc_size"],
9595
pvc_class=ev["vllm_common_pvc_storage_class"],
96-
dry_run=ev["control_dry_run"],
96+
pvc_access_mode=ev['vllm_common_pvc_access_mode'],
97+
dry_run=ev["control_dry_run"]
9798
)
9899

99100
pod_yaml = f"""apiVersion: v1

setup/steps/10_smoketest.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -109,20 +109,21 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
109109
current_model_ID_label = model_attribute(model, "modelid_label")
110110

111111
if dry_run:
112-
pod_ip_list = "127.0.0.4"
113-
try:
114-
pod_ip_list = []
115-
if is_standalone_deployment(ev):
116-
pods = client.CoreV1Api().list_namespaced_pod(namespace=ev["vllm_common_namespace"])
117-
for pod in pods.items:
118-
if pod_string in pod.metadata.name:
112+
pod_ip_list = ["127.0.0.4"]
113+
else :
114+
try:
115+
pod_ip_list = []
116+
if is_standalone_deployment(ev):
117+
pods = client.CoreV1Api().list_namespaced_pod(namespace=ev["vllm_common_namespace"])
118+
for pod in pods.items:
119+
if pod_string in pod.metadata.name:
120+
pod_ip_list.append(pod.status.pod_ip)
121+
else:
122+
pods = client.CoreV1Api().list_namespaced_pod(namespace=ev["vllm_common_namespace"], label_selector=f"llm-d.ai/model={current_model_ID_label},llm-d.ai/role={pod_string}")
123+
for pod in pods.items:
119124
pod_ip_list.append(pod.status.pod_ip)
120-
else:
121-
pods = client.CoreV1Api().list_namespaced_pod(namespace=ev["vllm_common_namespace"], label_selector=f"llm-d.ai/model={current_model_ID_label},llm-d.ai/role={pod_string}")
122-
for pod in pods.items:
123-
pod_ip_list.append(pod.status.pod_ip)
124-
except client.ApiException as e:
125-
announce(f"ERROR: Unable to find pods in namespace {ev['vllm_common_namespace']}: {e}")
125+
except client.ApiException as e:
126+
announce(f"ERROR: Unable to find pods in namespace {ev['vllm_common_namespace']}: {e}")
126127

127128
if not pod_ip_list:
128129
announce(f"ERROR: Unable to find IPs for pods \"{pod_string}\"!")
@@ -131,7 +132,7 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
131132
for pod_ip in pod_ip_list:
132133
announce(f" 🚀 Testing pod ip \"{pod_ip}\" ...")
133134
if dry_run:
134-
announce(f" ✅ Pod ip \"{pod_ip}\" responded successfully ({current_model})")
135+
announce(f" ✅ [DRY RUN] Pod ip \"{pod_ip}\" responded successfully ({current_model})")
135136
else:
136137
image_url = get_image(ev['llmd_image_registry'], ev['llmd_image_repo'], ev['llmd_image_name'], ev['llmd_image_tag'])
137138
received_model_name, curl_command_used = get_model_name_from_pod(ev['vllm_common_namespace'], image_url, pod_ip, ev['vllm_common_inference_port'])
@@ -144,7 +145,7 @@ def check_deployment(api: pykube.HTTPClient, client: any, ev: dict):
144145
announce(f"🚀 Testing service/gateway \"{service_ip}\" (port 80)...")
145146

146147
if dry_run:
147-
announce(f"✅ Service responds successfully ({current_model})")
148+
announce(f"✅ [DRY RUN] Service responds successfully ({current_model})")
148149
else:
149150
image_url = get_image(ev['llmd_image_registry'], ev['llmd_image_repo'], ev['llmd_image_name'], ev['llmd_image_tag'])
150151
received_model_name, curl_command_used = get_model_name_from_pod(ev['vllm_common_namespace'], image_url, service_ip, "80")

0 commit comments

Comments
 (0)