diff --git a/build/Dockerfile b/build/Dockerfile index 5fc2fc45..18345f74 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -44,7 +44,7 @@ RUN cd inference-perf; \ ARG VLLM_BENCHMARK_REPO=https://github.com/vllm-project/vllm.git ARG VLLM_BENCHMARK_BRANCH=main -ARG VLLM_BENCHMARK_COMMIT=e675dda67be278d21c4ec177b2baa8b7a0550920 +ARG VLLM_BENCHMARK_COMMIT=f176443446f659dbab5315e056e605d8984fd976 RUN git clone --branch ${VLLM_BENCHMARK_BRANCH} ${VLLM_BENCHMARK_REPO} RUN cd vllm; git checkout ${VLLM_BENCHMARK_COMMIT} # Patch the pyproject.toml to allow "pip install -e ." @@ -71,7 +71,7 @@ RUN cd vllm; VLLM_TARGET_DEVICE=empty pip install -e . --no-build-isolation # GuideLLM also requires torch, installed above ARG GUIDELLM_REPO=https://github.com/vllm-project/guidellm.git ARG GUIDELLM_BRANCH=main -ARG GUIDELLM_COMMIT=adfa108ab1df6f2a1452d1037a71817a493303a8 +ARG GUIDELLM_COMMIT=f9f1e3181274b7fecb615158f7bde48b9d20001d RUN git clone --branch ${GUIDELLM_BRANCH} ${GUIDELLM_REPO} RUN cd guidellm; \ git checkout ${GUIDELLM_COMMIT}; \ diff --git a/existing_stack/run_only.sh b/existing_stack/run_only.sh index e94ab216..92549957 100755 --- a/existing_stack/run_only.sh +++ b/existing_stack/run_only.sh @@ -15,7 +15,7 @@ # limitations under the License. if uname -s | grep -qi darwin; then - alias sed=gsed + alias sed=gsed fi # Constants @@ -71,7 +71,7 @@ function announce { ;; *) echo -e "==> $(date) - ${0} - $message" >> ${logfile} - ;; + ;; esac } @@ -129,7 +129,7 @@ function results_dir_name { local workload_name="${4:+_$4}" sanitize_dir_name "${RESULTS_DIR_PREFIX}/${harness_name}_${experiment_id}${workload_name}_${stack_name}" -} +} # Retrieve list of available harnesses function get_harness_list { @@ -292,12 +292,15 @@ else if [[ "${_output_destination}" == *"://"* ]]; then _storage_type="cloud" _scheme=$(echo "${_output_destination}" | cut -d: -f1) + _bucket=$(echo "${_output_destination}" | cut -d ':' -f 2 | sed -e 's^//^^g' -e 's:/*$::') case "${_scheme}" in gs) announce "ℹ️ Verifying GCS output destination..." if ! command -v gcloud &> /dev/null; then announce "❌ 'gcloud' command not found, but is required for 'gs://' output." exit 1 + else + is_bucket=$(gcloud storage buckets list | grep ${_bucket} || true) fi ;; s3) @@ -305,6 +308,8 @@ else if ! command -v aws &> /dev/null; then announce "❌ 'aws' command not found, but is required for 's3://' output." exit 1 + else + is_bucket=$(aws s3 ls | grep ${_bucket} || true) fi ;; *) @@ -312,6 +317,14 @@ else exit 1 ;; esac + + if [[ -z $is_bucket ]]; then + announce "❌ ERROR: Bucket \"${_bucket}\" ('${_output_destination}') not found." + exit1 1 + else + announce "✅ Output destination checked\"" + fi + else _storage_type="local" announce "ℹ️ Verifying local output destination '${_output_destination}'" @@ -327,9 +340,9 @@ fi if [[ "$harness_parallelism" != "1" ]]; then announce "❌ ERROR: harness_parallelism is set to '$harness_parallelism'. Only parallelism=1 is supported." exit 1 -fi +fi #@TODO harness_parallelism=1 only is supported for now!!! -#@TODO: The 'upload_results' function currently handles only one pod. +#@TODO: The 'upload_results' function currently handles only one pod. # To support parallelism, it must collect results from all harness pods. _harness_pod_name=$(sanitize_pod_name "${HARNESS_POD_LABEL}") @@ -345,9 +358,9 @@ _control_dir=$(realpath $(pwd)/) # Verify HF token secret exists # ======================================================== announce "🔧 Verifying HF token secret ${endpoint_hf_token_secret} in namespace ${endpoint_namespace}" -if $control_kubectl --namespace "$endpoint_namespace" get secret "$endpoint_hf_token_secret" 2>&1 > /dev/null; then +if $control_kubectl --namespace "$endpoint_namespace" get secret "$endpoint_hf_token_secret" 2>&1 > /dev/null; then announce "ℹ️ Using HF token secret $endpoint_hf_token_secret" -else +else announce "❌ ERROR: could not fetch HF token secret $endpoint_hf_token_secret" exit 1 fi @@ -388,7 +401,7 @@ announce "ℹ️ ConfigMap '${harness_name}-profiles' created" # Create harness pod -# ======================================================== +# ======================================================== _pod_name="${_harness_pod_name}" # place holder for parallelism support announce "ℹ️ Creating harness pod ${_pod_name}" @@ -403,7 +416,7 @@ announce "ℹ️ Running benchmark with Experiment ID ${_uid}. Results will be stored in PVC ${harness_results_pvc}. - Note: + Note: Benchmark will continue to run even on time-out or connection failure. Can follow progress by checking the logs (${control_kubectl} logs -f ${_pod_name} -n ${harness_namespace}). " @@ -452,8 +465,8 @@ case "${_storage_type}" in ;; esac -announce "✅ +announce "✅ Experiment ID is ${_uid}. - All workloads completed. + All workloads completed. Results should be available in ${final_msg} -" \ No newline at end of file +" diff --git a/setup/env.sh b/setup/env.sh index c99b8ba6..5e4e118a 100644 --- a/setup/env.sh +++ b/setup/env.sh @@ -219,6 +219,7 @@ export LLMDBENCH_HARNESS_CPU_MEM=${LLMDBENCH_HARNESS_CPU_MEM:-32Gi} export LLMDBENCH_HARNESS_NAMESPACE=${LLMDBENCH_HARNESS_NAMESPACE:-llmdbench} export LLMDBENCH_HARNESS_PVC_NAME="${LLMDBENCH_HARNESS_PVC_NAME:-"workload-pvc"}" export LLMDBENCH_HARNESS_PVC_SIZE="${LLMDBENCH_HARNESS_PVC_SIZE:-20Gi}" +export LLMDBENCH_HARNESS_OUTPUT=${LLMDBENCH_HARNESS_OUTPUT:-"local"} export LLMDBENCH_HARNESS_SKIP_RUN=${LLMDBENCH_HARNESS_SKIP_RUN:-} export LLMDBENCH_HARNESS_ENVVARS_TO_YAML=${LLMDBENCH_HARNESS_ENVVARS_TO_YAML:-LLMDBENCH_RUN_EXPERIMENT} export LLMDBENCH_HARNESS_LOAD_PARALLELISM=${LLMDBENCH_HARNESS_LOAD_PARALLELISM:-1} diff --git a/setup/functions.sh b/setup/functions.sh index c3a1e481..6e0ed139 100755 --- a/setup/functions.sh +++ b/setup/functions.sh @@ -243,65 +243,6 @@ function render_string { } export -f render_string -function render_template { - local template_file_path=$1 - local output_file_path=${2:-"none"} - local additional_replace_commands=${3:-"none"} - local cmdline_mode=${4:-0} - local env_var_mode=${5:-0} - - rm -f $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - touch $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - - if [[ $additional_replace_commands != "none" ]]; then - cat $additional_replace_commands >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - fi - - for entry in $(cat ${template_file_path} | grep -v ^# | $LLMDBENCH_CONTROL_SCMD -e 's^-^\n^g' -e 's^:^\n^g' -e 's^ ^\n^g' -e 's^ ^^g' -e 's^\.^\n^g' -e 's^\/^\n^g' | grep -E "REPLACE_ENV" | uniq); do - render_string $entry &>/dev/null - done - - echo "s^#.*^^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - if [[ $cmdline_mode -eq 1 ]]; then - if [[ $LLMDBENCH_CURRENT_STEP == "06" ]]; then - echo " - |" - local spacec=$(printf '%*s' 12 '') - fi - - if [[ $LLMDBENCH_CURRENT_STEP == "09" ]]; then - echo "- |" - local spacec=$(printf '%*s' 8 '') - fi - echo "s^REPLACE_SPACESC^$spacec^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - echo "s^ --^\\n$spacec--^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - echo "s^\\n^ \\\\\n^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - echo "s^REPLACE_COMMA^,^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - fi - - if [[ $env_var_mode -eq 1 ]]; then - if [[ $LLMDBENCH_CURRENT_STEP == "06" ]]; then - local spacec=$(printf '%*s' 8 '') - fi - if [[ $LLMDBENCH_CURRENT_STEP == "09" ]]; then - local spacec=$(printf '%*s' 6 '') - fi - echo "s^REPLACE_SPACESC^$spacec^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - fi - - if [[ $output_file_path != "none" ]]; then - cat ${template_file_path} | $LLMDBENCH_CONTROL_SCMD -f $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands > $output_file_path - fi - - if [[ $cmdline_mode -eq 1 ]]; then - echo "REPLACE_SPACESC$(cat ${template_file_path})" | $LLMDBENCH_CONTROL_SCMD -f $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - fi - - if [[ $env_var_mode -eq 1 ]]; then - echo "$(cat ${template_file_path} | $LLMDBENCH_CONTROL_SCMD -e 's^\^^REPLACE_SPACESC^g')" | $LLMDBENCH_CONTROL_SCMD -e '1s^REPLACE_SPACESC^^' | $LLMDBENCH_CONTROL_SCMD -f $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands - fi -} -export -f render_template - function not_valid_ip { local ip=$1 @@ -483,7 +424,7 @@ function deploy_harness_config { fi announce "✅ All benchmark pods completed" - announce "🏗️ Collecting results for pods with label \"app=${LLMDBENCH_HARNESS_POD_LABEL}\"..." + announce "🏗️ Collecting results for pods with label \"app=${LLMDBENCH_HARNESS_POD_LABEL}\"..." for i in $(seq 1 "$LLMDBENCH_HARNESS_LOAD_PARALLELISM"); do # Per-pod directories pod_results_dir="${local_results_dir}_${i}" @@ -504,6 +445,8 @@ function deploy_harness_config { if [[ -d ${pod_results_dir}/analysis && $LLMDBENCH_HARNESS_DEBUG -eq 0 && ${LLMDBENCH_HARNESS_WAIT_TIMEOUT} -ne 0 ]]; then llmdbench_execute_cmd "$copy_analysis_cmd" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE} fi + + upload_results ${pod_results_dir} done announce "✅ Collected results for pods with label \"app=${LLMDBENCH_HARNESS_POD_LABEL}\" at: \"${LLMDBENCH_CONTROL_WORK_DIR}/results/\"" announce "✅ Collected analysis for pods with label \"app=${LLMDBENCH_HARNESS_POD_LABEL}\" at: \"${LLMDBENCH_CONTROL_WORK_DIR}/analysis/\"" @@ -537,13 +480,14 @@ function capture_pod_logs { pod_results_dir="${local_results_dir}_${i}" pod_analysis_dir="${local_analysis_dir}_${i}" - announce "ℹ️ Capturing the current status of all pods in namespace \"$LLMDBENCH_VLLM_COMMON_NAMESPACE\" to ${pod_results_dir}/pod_status.txt ..." + announce "🏗️ Capturing the current status of all pods in namespace \"$LLMDBENCH_VLLM_COMMON_NAMESPACE\" to ${pod_results_dir}/pod_status.txt ..." llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} --namespace $LLMDBENCH_VLLM_COMMON_NAMESPACE get pods -o wide > ${pod_results_dir}/pod_status.txt" \ ${LLMDBENCH_CONTROL_DRY_RUN} \ ${LLMDBENCH_CONTROL_VERBOSE} - announce "✅ Pod status captured." + announce "✅ Pod status captured to \"${pod_results_dir}/pod_status.txt\"" + + announce "🏗️ Capturing logs for all pods in namespace \"$LLMDBENCH_VLLM_COMMON_NAMESPACE\" to ${pod_results_dir}/logs/ ..." - announce "ℹ️ Capturing logs for all pods in namespace \"$LLMDBENCH_VLLM_COMMON_NAMESPACE\" to ${pod_results_dir}/logs/ ..." mkdir -p ${pod_results_dir}/logs/ llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} --namespace $LLMDBENCH_VLLM_COMMON_NAMESPACE logs --tail=-1 --prefix=true -l llm-d.ai/model=\"$modelid_label\" > ${pod_results_dir}/logs/modelserving_pods.log" \ ${LLMDBENCH_CONTROL_DRY_RUN} \ @@ -556,6 +500,7 @@ function capture_pod_logs { llmdbench_execute_cmd "${LLMDBENCH_CONTROL_KCMD} --namespace $LLMDBENCH_VLLM_COMMON_NAMESPACE logs --tail=-1 --prefix=true -l \"app.kubernetes.io/component=inference-gateway\" > ${pod_results_dir}/logs/igw_pods.log" \ ${LLMDBENCH_CONTROL_DRY_RUN} \ ${LLMDBENCH_CONTROL_VERBOSE} + announce "✅ Pod logs captured to \"${pod_results_dir}/logs/\"" done } export -f capture_pod_logs @@ -956,3 +901,136 @@ function user_has_hf_model_access { case "$http_code" in 200) return 0 ;; 401|403) return 1 ;; *) return 2 ;; esac } export -f user_has_hf_model_access + +function verify_output_destination { + local _output_destination=$1 + if [[ ${_output_destination} == "local" ]]; then + export LLMDBENCH_HARNESS_OUTPUT_STORAGE_TYPE=local + return 0 + else + if [[ "${_output_destination}" == *"://"* ]]; then + _storage_type="cloud" + _scheme=$(echo "${_output_destination}" | cut -d: -f1) + _bucket=$(echo "${_output_destination}" | cut -d ':' -f 2 | $LLMDBENCH_CONTROL_SCMD -e 's^//^^g' -e 's:/*$::') + case "${_scheme}" in + gs) + export LLMDBENCH_HARNESS_OUTPUT_STORAGE_TYPE=gs + announce "ℹ️ Verifying GCS output destination..." + if ! command -v gcloud &> /dev/null; then + announce "❌ 'gcloud' command not found, but is required for 'gs://' output." + exit 1 + else + is_bucket=$(gcloud storage buckets list | grep ${_bucket} || true) + fi + ;; + s3) + export LLMDBENCH_HARNESS_OUTPUT_STORAGE_TYPE=s3 + announce "ℹ️ Verifying S3 output destination..." + if ! command -v aws &> /dev/null; then + announce "❌ 'aws' command not found, but is required for 's3://' output." + exit 1 + else + is_bucket=$(aws s3 ls | grep ${_bucket} || true) + fi + ;; + *) + announce "❌ ERROR: Unsupported cloud provider scheme '${_scheme}' for destination '${_output_destination}'." + exit 1 + ;; + esac + + if [[ -z $is_bucket ]]; then + announce "❌ ERROR: Bucket \"${_bucket}\" ('${_output_destination}') not found." + exit1 1 + else + announce "✅ Output destination verified\"" + fi + fi + fi +} +export -f verify_output_destination + +function upload_results { + local local_results_dir=$1 + local remote_results_dir=$(echo $local_results_dir | $LLMDBENCH_CONTROL_SCMD -e "s^$LLMDBENCH_CONTROL_WORK_DIR/results/^^g") + + if [[ "${LLMDBENCH_HARNESS_OUTPUT_STORAGE_TYPE}" == "local" ]]; then + return 0 + fi + case ${LLMDBENCH_HARNESS_OUTPUT_STORAGE_TYPE} in + gs) + announce "☁️ Uploading results to GCS bucket ${LLMDBENCH_HARNESS_OUTPUT}" + gcloud storage cp --recursive "${local_results_dir}/" "${LLMDBENCH_HARNESS_OUTPUT}/${remote_results_dir}/" + ;; + s3) + announce "☁️ Uploading results to S3 bucket ${LLMDBENCH_HARNESS_OUTPUT}" + aws s3 cp --recursive "${local_results_dir}/" "${LLMDBENCH_HARNESS_OUTPUT}/${remote_results_dir}/" + ;; + local) + announce "ℹ️ Results saved to local folder." + ;; + *) + announce "❌ ERROR: unknown or unsupported storage provider \"${LLMDBENCH_HARNESS_OUTPUT_STORAGE_TYPE}\"." + exit 1 + ;; + esac +} + +function render_template { + local template_file_path=$1 + local output_file_path=${2:-"none"} + local additional_replace_commands=${3:-"none"} + local cmdline_mode=${4:-0} + local env_var_mode=${5:-0} + + rm -f $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + touch $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + + if [[ $additional_replace_commands != "none" ]]; then + cat $additional_replace_commands >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + fi + + for entry in $(cat ${template_file_path} | grep -v ^# | $LLMDBENCH_CONTROL_SCMD -e 's^-^\n^g' -e 's^:^\n^g' -e 's^ ^\n^g' -e 's^ ^^g' -e 's^\.^\n^g' -e 's^\/^\n^g' | grep -E "REPLACE_ENV" | uniq); do + render_string $entry &>/dev/null + done + + echo "s^#.*^^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + if [[ $cmdline_mode -eq 1 ]]; then + if [[ $LLMDBENCH_CURRENT_STEP == "06" ]]; then + echo " - |" + local spacec=$(printf '%*s' 12 '') + fi + + if [[ $LLMDBENCH_CURRENT_STEP == "09" ]]; then + echo "- |" + local spacec=$(printf '%*s' 8 '') + fi + echo "s^REPLACE_SPACESC^$spacec^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + echo "s^ --^\\n$spacec--^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + echo "s^\\n^ \\\\\n^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + echo "s^REPLACE_COMMA^,^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + fi + + if [[ $env_var_mode -eq 1 ]]; then + if [[ $LLMDBENCH_CURRENT_STEP == "06" ]]; then + local spacec=$(printf '%*s' 8 '') + fi + if [[ $LLMDBENCH_CURRENT_STEP == "09" ]]; then + local spacec=$(printf '%*s' 6 '') + fi + echo "s^REPLACE_SPACESC^$spacec^g" >> $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + fi + + if [[ $output_file_path != "none" ]]; then + cat ${template_file_path} | $LLMDBENCH_CONTROL_SCMD -f $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands > $output_file_path + fi + + if [[ $cmdline_mode -eq 1 ]]; then + echo "REPLACE_SPACESC$(cat ${template_file_path})" | $LLMDBENCH_CONTROL_SCMD -f $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + fi + + if [[ $env_var_mode -eq 1 ]]; then + echo "$(cat ${template_file_path} | $LLMDBENCH_CONTROL_SCMD -e 's^\^^REPLACE_SPACESC^g')" | $LLMDBENCH_CONTROL_SCMD -e '1s^REPLACE_SPACESC^^' | $LLMDBENCH_CONTROL_SCMD -f $LLMDBENCH_CONTROL_WORK_DIR/setup/sed-commands + fi +} +export -f render_template diff --git a/setup/run.sh b/setup/run.sh index d1ba573a..b6739a6d 100755 --- a/setup/run.sh +++ b/setup/run.sh @@ -52,12 +52,13 @@ function show_usage { -k/--pvc [name of the PVC used to store the results (default=$LLMDBENCH_HARNESS_PVC_NAME)] \n \ -e/--experiments [path of yaml file containing a list of factors and levels for an experiment, useful for parameter sweeping (default=$LLMDBENCH_HARNESS_EXPERIMENT_TREATMENTS)] \n \ -o/--overrides [comma-separated list of workload profile parameters to be overriden (default=$LLMDBENCH_HARNESS_EXPERIMENT_PROFILE_OVERRIDES)] \n \ + -r/--output destination for the results. (e.g. default=$LLMDBENCH_HARNESS_OUTPUT, gs://my-bucket, s3://my-bucket) -z/--skip [skip the execution of the experiment, and only collect data (default=$LLMDBENCH_HARNESS_SKIP_RUN)] \n \ -v/--verbose [print the command being executed, and result (default=$LLMDBENCH_CONTROL_VERBOSE)] \n \ -x/--dataset [url for dataset to be replayed (default=$LLMDBENCH_RUN_DATASET_URL)] \n \ -u/--wva [deploy model with Workload Variant Autoscaler (default=$LLMDBENCH_WVA_ENABLED)] \n \ -j/--parallelism [number of harness pods to be created (default=$LLMDBENCH_HARNESS_LOAD_PARALLELISM)] \n \ - -s/--wait [time to wait until the benchmark run is complete (default=$LLMDBENCH_HARNESS_WAIT_TIMEOUT, value \"0\" means "do not wait\""] \n \ + -s/--wait [time to wait until the benchmark run is complete (default=$LLMDBENCH_HARNESS_WAIT_TIMEOUT, value \"0\" means \"do not wait\"] \n \ -g/--envvarspod [list all environment variables which should be propagated to the harness pods (default=$LLMDBENCH_HARNESS_ENVVARS_TO_YAML)] \n \ -d/--debug [execute harness in \"debug-mode\" (default=$LLMDBENCH_HARNESS_DEBUG)] \n \ -h/--help (show this help)" @@ -172,6 +173,13 @@ while [[ $# -gt 0 ]]; do export LLMDBENCH_CLIOVERRIDE_RUN_DATASET_URL="$2" shift ;; + -r=*|--output=*) + export LLMDBENCH_CLIOVERRIDE_HARNESS_OUTPUT=$(echo $key | cut -d '=' -f 2) + ;; + -r|--output) + export LLMDBENCH_CLIOVERRIDE_HARNESS_OUTPUT="$2" + shift + ;; -u|--wva) export LLMDBENCH_WVA_ENABLED=1 ;; @@ -233,6 +241,8 @@ set -euo pipefail export LLMDBENCH_CURRENT_STEP=99 +verify_output_destination $LLMDBENCH_HARNESS_OUTPUT + for method in ${LLMDBENCH_DEPLOY_METHODS//,/ }; do for model in ${LLMDBENCH_DEPLOY_MODEL_LIST//,/ }; do