Skip to content

Commit b025013

Browse files
authored
Run benchmark test on PRs (#229)
* Run benchmark test on PRs Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Add hf token placeholder Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix env var Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Trigger workflow Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Rm teardown Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Use kind Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix os Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix node label Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix steps Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix step list Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix env var Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix env var Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix env var again Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix file call Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix file call for e2e Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix gaie call for e2e Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Partially working Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix standup step Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Add debugging stmt Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Specify pod rs Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Get pod logs Signed-off-by: Jing Chen <jing.chen2@ibm.com> * sleep for longer Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix ns debug Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Check pvc status Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Ensure pvc is created Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Name error Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Add ns to pvc Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Rm need for pv and pvc Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Run step fix Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Fix harness call Signed-off-by: Jing Chen <jing.chen2@ibm.com> * Skip harness pod creation Signed-off-by: Jing Chen <jing.chen2@ibm.com> --------- Signed-off-by: Jing Chen <jing.chen2@ibm.com>
1 parent bf9414f commit b025013

File tree

6 files changed

+149
-51
lines changed

6 files changed

+149
-51
lines changed
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
name: CI - PR Benchmark Run
2+
3+
on:
4+
pull_request:
5+
6+
jobs:
7+
run-benchmark:
8+
name: Inference Sim Benchmark Test
9+
runs-on: ubuntu-latest
10+
timeout-minutes: 240
11+
12+
steps:
13+
- name: Checkout code
14+
uses: actions/checkout@v4
15+
with:
16+
fetch-depth: 0
17+
18+
- name: Display OS used
19+
run: |
20+
cat /etc/*os-*
21+
shell: bash
22+
23+
- name: Create k8s Kind Cluster
24+
uses: helm/kind-action@v1
25+
26+
- name: Label node with affinity from inference-sim scenario
27+
run: |
28+
NODE=$(kubectl get nodes -o jsonpath='{.items[0].metadata.name}')
29+
echo "Labeling node: $NODE"
30+
kubectl label node "$NODE" kubernetes.io/os=linux --overwrite
31+
32+
- name: Run install_deps
33+
run: |
34+
sudo apt-get update
35+
./setup/install_deps.sh
36+
shell: bash
37+
38+
- name: Populate python deps
39+
run: |
40+
echo -e "pandas\ngrip>=4.6.0\nmatplotlib>=3.7.0\nnumpy>=1.22.0\nseaborn>=0.12.0\nkubernetes>=28.0.0" > requirements.txt
41+
42+
- name: Install python deps
43+
uses: actions/setup-python@v5
44+
with:
45+
python-version: '3.13'
46+
cache: 'pip'
47+
- run: pip install -r requirements.txt
48+
49+
- name: Standup a modelservice using llm-d-inference-sim
50+
env:
51+
LLMDBENCH_HF_TOKEN: hf-token-placeholder
52+
run: |
53+
./setup/standup.sh -c kind_modelservice_inference-sim -t modelservice -s 0,1,2,7,8,9
54+
55+
- name: Run harness (mock)
56+
env:
57+
LLMDBENCH_HF_TOKEN: hf-token-placeholder
58+
LLMD_CONTROL_DRY_RUN: 1 # TODO: harness doesn't work now for kind bc no harness endpoint
59+
run: |
60+
./setup/run.sh -c kind_modelservice_inference-sim --dry-run
61+
62+
- name: Teardown
63+
env:
64+
LLMDBENCH_HF_TOKEN: hf-token-placeholder
65+
run: |
66+
./setup/teardown.sh -c kind_modelservice_inference-sim
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# A scenario to capture running inference-sim on a Kind cluster without requiring GPUs
2+
export LLMDBENCH_DEPLOY_METHODS=modelservice
3+
export LLMDBENCH_VLLM_COMMON_REPLICAS=1
4+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_ACCELERATOR_NR=0
5+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_ACCELERATOR_NR=0
6+
export LLMDBENCH_VLLM_COMMON_AFFINITY=kubernetes.io/os:linux
7+
export LLMDBENCH_LLMD_IMAGE_NAME="llm-d-inference-sim"
8+
export LLMDBENCH_LLMD_IMAGE_TAG="v0.3.0"
9+
export LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_TAG="v0.2.0@sha256:a623a0752af0a71b7b05ebf95517848b5dbc3d8d235c1897035905632d5b7d80"
10+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_MODEL_COMMAND=imageDefault
11+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_MODEL_COMMAND=imageDefault
12+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_ARGS="[]"
13+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_ARGS="[]"
14+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_CPU_NR=0
15+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_CPU_NR=0
16+
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_CPU_MEM=100Mi
17+
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_CPU_MEM=100Mi
18+
export LLMDBENCH_VLLM_MODELSERVICE_URI="hf://facebook/opt-125m"
19+
export LLMDBENCH_DEPLOY_MODEL_LIST="facebook/opt-125m:facebook/opt-125m"
20+
export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE=3Gi
21+
export LLMDBENCH_HARNESS_PVC_SIZE=3Gi
22+
export LLMDBENCH_VLLM_COMMON_PVC_NAME=model-pvc
23+
export LLMDBENCH_CONTROL_DEPLOY_IS_OPENSHIFT=0
24+
export LLMDBENCH_CONTROL_RESOURCE_LIST=deployment,httproute,service,gateway,gatewayparameters,inferencepool,inferencemodel,cm,ing,pod,job
25+
export LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_MODEL=true
26+

scenarios/ocp_modelservice_inference-sim.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@ export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_ARGS="[]"
1414
#export LLMDBENCH_DEPLOY_MODEL_LIST="random/model"
1515
#export LLMDBENCH_HF_TOKEN="llm-d-hf-token" # <---- TODO: remove this dependency
1616
#export LLMDBENCH_VLLM_MODELSERVICE_URI="hf://random/model"
17-
#export LLMDBENCH_STEP_LIST=0,1,2,7,8,9
17+
#export LLMDBENCH_STEP_LIST=0,1,2,7,8,9

setup/functions.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ function model_attribute {
3333
"llama-8b") local model=meta-llama/Llama-3.1-8B-Instruct:llama-8b ;;
3434
"llama-70b") local model=meta-llama/Llama-3.1-70B-Instruct:llama-70b ;;
3535
"llama-17b") local model=meta-llama/Llama-4-Scout-17B-16E-Instruct:llama-17b ;;
36+
"facebook/opt-125m") local model=facebook/opt-1.0-125m-hf:opt-125m ;;
3637
*)
3738
true ;;
3839
esac

setup/run.sh

Lines changed: 53 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -323,56 +323,60 @@ for method in ${LLMDBENCH_DEPLOY_METHODS//,/ }; do
323323
continue
324324
fi
325325

326-
create_harness_pod
327-
328-
announce "🚀 Starting pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" ($LLMDBENCH_DEPLOY_CURRENT_MODEL)..."
329-
llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} apply -f $LLMDBENCH_CONTROL_WORK_DIR/setup/yamls/pod_benchmark-launcher.yaml" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
330-
announce "✅ Pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" started"
331-
332-
announce "⏳ Waiting for pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" to be Ready (timeout=${LLMDBENCH_CONTROL_WAIT_TIMEOUT}s)..."
333-
llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} wait --timeout=${LLMDBENCH_CONTROL_WAIT_TIMEOUT}s --for=jsonpath='{.status.phase}'=Running pod -l app=${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
334-
announce "✅ Benchmark execution for model \"$model\" effectivelly started"
335-
336-
announce "ℹ️ You can follow the execution's output with \"${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} logs -l app=${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME} -f\"..."
337-
338-
LLMDBENCH_HARNESS_ACCESS_RESULTS_POD_NAME=$(${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} get pod -l app=llm-d-benchmark-harness --no-headers -o name | $LLMDBENCH_CONTROL_SCMD 's|^pod/||g')
339-
llmdbench_execute_cmd "mkdir -p ${local_results_dir}/ && mkdir -p ${local_analysis_dir}/" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
340-
341-
copy_results_cmd="${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} cp --retries=5 $LLMDBENCH_HARNESS_ACCESS_RESULTS_POD_NAME:${LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR} ${local_results_dir}"
342-
copy_analysis_cmd="rsync -az --inplace --delete ${local_results_dir}/analysis/ ${local_analysis_dir}/ && rm -rf ${local_results_dir}/analysis"
343-
344-
if [[ $LLMDBENCH_HARNESS_DEBUG -eq 0 && ${LLMDBENCH_HARNESS_WAIT_TIMEOUT} -ne 0 ]]; then
345-
announce "⏳ Waiting for pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" to be in \"Completed\" state (timeout=${LLMDBENCH_HARNESS_WAIT_TIMEOUT}s)..."
346-
llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} wait --timeout=${LLMDBENCH_HARNESS_WAIT_TIMEOUT}s --for=condition=ready=False pod ${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
347-
announce "✅ Benchmark execution for model \"$model\" completed"
348-
349-
is_pod_in_error=$(${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} get pod/${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME} --no-headers | grep " Error " || true)
350-
if [ ! -z $is_pod_in_error ]; then
351-
announce "❌ Final status of pod \"$LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME\" is \"Error\""
352-
exit 1
353-
fi
354-
355-
announce "🗑️ Deleting pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" ..."
356-
llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} delete pod ${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
357-
announce "✅ Pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" deleted"
358-
359-
announce "🏗️ Collecting results for model \"$model\" ($LLMDBENCH_DEPLOY_CURRENT_MODEL) to \"${local_results_dir}\"..."
360-
llmdbench_execute_cmd "${copy_results_cmd}" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
361-
362-
if [[ -d ${local_results_dir}/analysis && $LLMDBENCH_HARNESS_DEBUG -eq 0 && ${LLMDBENCH_HARNESS_WAIT_TIMEOUT} -ne 0 ]]; then
363-
llmdbench_execute_cmd "$copy_analysis_cmd" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
364-
fi
365-
366-
announce "✅ Results for model \"$model\" collected successfully"
367-
elif [[ $LLMDBENCH_HARNESS_WAIT_TIMEOUT -eq 0 ]]; then
368-
announce "ℹ️ Harness was started with LLMDBENCH_HARNESS_WAIT_TIMEOUT=0. Will NOT wait for pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" to be in \"Completed\" state. The pod can be accessed through \"${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} exec -it pod/${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME} -- bash\""
369-
announce "ℹ️ To collect results after an execution, \"$copy_results_cmd && $copy_analysis_cmd"
370-
break
326+
if [[ $LLMDBENCH_CONTROL_DRY_RUN -eq 1 ]]; then
327+
announce "ℹ️ Skipping \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\ creation"
371328
else
372-
announce "ℹ️ Harness was started in \"debug mode\". The pod can be accessed through \"${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} exec -it pod/${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME} -- bash\""
373-
announce "ℹ️ In order to execute a given workload profile, run \"llm-d-benchmark.sh <[$(get_harness_list)]> [WORKLOAD FILE NAME]\" (all inside the pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\")"
374-
announce "ℹ️ To collect results after an execution, \"$copy_results_cmd && $copy_analysis_cmd"
375-
break
329+
create_harness_pod
330+
331+
announce "🚀 Starting pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" ($LLMDBENCH_DEPLOY_CURRENT_MODEL)..."
332+
llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} apply -f $LLMDBENCH_CONTROL_WORK_DIR/setup/yamls/pod_benchmark-launcher.yaml" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
333+
announce "✅ Pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" started"
334+
335+
announce "⏳ Waiting for pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" to be Ready (timeout=${LLMDBENCH_CONTROL_WAIT_TIMEOUT}s)..."
336+
llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} wait --timeout=${LLMDBENCH_CONTROL_WAIT_TIMEOUT}s --for=jsonpath='{.status.phase}'=Running pod -l app=${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
337+
announce "✅ Benchmark execution for model \"$model\" effectivelly started"
338+
339+
announce "ℹ️ You can follow the execution's output with \"${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} logs -l app=${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME} -f\"..."
340+
341+
LLMDBENCH_HARNESS_ACCESS_RESULTS_POD_NAME=$(${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} get pod -l app=llm-d-benchmark-harness --no-headers -o name | $LLMDBENCH_CONTROL_SCMD 's|^pod/||g')
342+
llmdbench_execute_cmd "mkdir -p ${local_results_dir}/ && mkdir -p ${local_analysis_dir}/" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
343+
344+
copy_results_cmd="${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} cp --retries=5 $LLMDBENCH_HARNESS_ACCESS_RESULTS_POD_NAME:${LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR} ${local_results_dir}"
345+
copy_analysis_cmd="rsync -az --inplace --delete ${local_results_dir}/analysis/ ${local_analysis_dir}/ && rm -rf ${local_results_dir}/analysis"
346+
347+
if [[ $LLMDBENCH_HARNESS_DEBUG -eq 0 && ${LLMDBENCH_HARNESS_WAIT_TIMEOUT} -ne 0 ]]; then
348+
announce "⏳ Waiting for pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" to be in \"Completed\" state (timeout=${LLMDBENCH_HARNESS_WAIT_TIMEOUT}s)..."
349+
llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} wait --timeout=${LLMDBENCH_HARNESS_WAIT_TIMEOUT}s --for=condition=ready=False pod ${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
350+
announce "✅ Benchmark execution for model \"$model\" completed"
351+
352+
is_pod_in_error=$(${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} get pod/${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME} --no-headers | grep " Error " || true)
353+
if [ ! -z $is_pod_in_error ]; then
354+
announce "❌ Final status of pod \"$LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME\" is \"Error\""
355+
exit 1
356+
fi
357+
358+
announce "🗑️ Deleting pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" ..."
359+
llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} delete pod ${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
360+
announce "✅ Pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" deleted"
361+
362+
announce "🏗️ Collecting results for model \"$model\" ($LLMDBENCH_DEPLOY_CURRENT_MODEL) to \"${local_results_dir}\"..."
363+
llmdbench_execute_cmd "${copy_results_cmd}" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
364+
365+
if [[ -d ${local_results_dir}/analysis && $LLMDBENCH_HARNESS_DEBUG -eq 0 && ${LLMDBENCH_HARNESS_WAIT_TIMEOUT} -ne 0 ]]; then
366+
llmdbench_execute_cmd "$copy_analysis_cmd" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE}
367+
fi
368+
369+
announce "✅ Results for model \"$model\" collected successfully"
370+
elif [[ $LLMDBENCH_HARNESS_WAIT_TIMEOUT -eq 0 ]]; then
371+
announce "ℹ️ Harness was started with LLMDBENCH_HARNESS_WAIT_TIMEOUT=0. Will NOT wait for pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\" for model \"$model\" to be in \"Completed\" state. The pod can be accessed through \"${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} exec -it pod/${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME} -- bash\""
372+
announce "ℹ️ To collect results after an execution, \"$copy_results_cmd && $copy_analysis_cmd"
373+
break
374+
else
375+
announce "ℹ️ Harness was started in \"debug mode\". The pod can be accessed through \"${LLMDBENCH_CONTROL_KCMD} --namespace ${LLMDBENCH_HARNESS_NAMESPACE} exec -it pod/${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME} -- bash\""
376+
announce "ℹ️ In order to execute a given workload profile, run \"llm-d-benchmark.sh <[$(get_harness_list)]> [WORKLOAD FILE NAME]\" (all inside the pod \"${LLMDBENCH_RUN_HARNESS_LAUNCHER_NAME}\")"
377+
announce "ℹ️ To collect results after an execution, \"$copy_results_cmd && $copy_analysis_cmd"
378+
break
379+
fi
376380
fi
377381
done
378382
fi

setup/steps/09_deploy_via_modelservice.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ if [[ $LLMDBENCH_CONTROL_ENVIRONMENT_TYPE_MODELSERVICE_ACTIVE -eq 1 ]]; then
2727
export LLMDBENCH_DEPLOY_CURRENT_MODEL_ID_LABEL=$(model_attribute $model modelid_label)
2828

2929
# If LLMDBENCH_VLLM_MODELSERVICE_URI is not defined, set it to pvc://
30-
if [[ -n "$LLMDBENCH_VLLM_MODELSERVICE_URI" ]]; then
30+
if [[ -z "$LLMDBENCH_VLLM_MODELSERVICE_URI" ]]; then
3131
export LLMDBENCH_VLLM_MODELSERVICE_URI="pvc://${LLMDBENCH_VLLM_COMMON_PVC_NAME}/models/$(model_attribute $model model)"
3232
fi
3333

@@ -64,6 +64,7 @@ routing:
6464
kind: Gateway
6565
name: infra-${LLMDBENCH_VLLM_MODELSERVICE_RELEASE}-inference-gateway
6666
proxy:
67+
image: "$(get_image ${LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_REGISTRY} ${LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_REPO} ${LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_NAME} ${LLMDBENCH_LLMD_ROUTINGSIDECAR_IMAGE_TAG} 0)"
6768
secure: false
6869
inferenceModel:
6970
create: ${LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_MODEL}

0 commit comments

Comments
 (0)