awslabs · fgbelidji · Aug 14, 2025 · Aug 14, 2025 · Aug 14, 2025 · Aug 14, 2025
diff --git a/huggingface/pytorch/hf-vllm/docker/0.10.2/THIRD-PARTY-LICENSES b/huggingface/pytorch/hf-vllm/docker/0.10.2/THIRD-PARTY-LICENSES
diff --git a/huggingface/pytorch/hf-vllm/docker/0.10.2/gpu/Dockerfile b/huggingface/pytorch/hf-vllm/docker/0.10.2/gpu/Dockerfile
diff --git a/huggingface/pytorch/hf-vllm/docker/0.10.2/gpu/start-cuda-compat.sh b/huggingface/pytorch/hf-vllm/docker/0.10.2/gpu/start-cuda-compat.sh
@@ -0,0 +1,42 @@
+ARG FINAL_BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/vllm:0.11.0-gpu-py312-cu128-ubuntu22.04-sagemaker-v1.7 
+FROM ${FINAL_BASE_IMAGE} AS vllm-base
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+ARG HUGGINGFACE_HUB_VERSION=0.36.0
+ARG HF_XET_VERSION=1.2.0
+
+RUN apt-get update -y \
+&& apt-get install -y --no-install-recommends curl unzip \
+&& rm -rf /var/lib/apt/lists/*
+
+
+RUN pip install --upgrade pip && \
+   pip install --no-cache-dir \
+     huggingface-hub==${HUGGINGFACE_HUB_VERSION} \
+     hf-xet==${HF_XET_VERSION} \
+     grpcio
+
+
+FROM vllm-base AS sagemaker
+ENV HF_HUB_ENABLE_HF_TRANSFER="1" \
+    HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:gpu-cuda:inference:hf-vllm"
+
+RUN set -eux; \
+    HOME_DIR=/root; \
+    uv pip install --system --upgrade pip requests PTable; \
+    curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip; \
+    unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/; \
+    cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance; \
+    chmod +x /usr/local/bin/testOSSCompliance; \
+    chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh; \
+    ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3; \
+    rm -rf ${HOME_DIR}/oss_compliance*
+
+COPY /huggingface/pytorch/hf-vllm/docker/0.11.0/THIRD-PARTY-LICENSES /root/THIRD-PARTY-LICENSES
+
+ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
+
+
+
@@ -0,0 +1,51 @@
+
+version: 0.2
+
+env:
+  shell: bash
+  variables:
+    FRAMEWORK_FOLDER: "huggingface/pytorch/hf-vllm/docker"
+    PYTHONPATH: "/codebuild/output/src*/src/github.com/awslabs/llm-hosting-container"
+
+phases:
+  install:
+    runtime-versions:
+      python: 3.12
+    commands:
+      - echo "Installing Python version 3.12 ..."
+      - pyenv global $PYTHON_312_VERSION
+
+  pre_build:
+    commands:
+      - echo Pre-build started on `date`
+      - export PYTHONPATH=$(pwd):$PYTHONPATH
+
+      # Continue with regular pre-build steps if BUILD_REQUIRED=true 
+      - |
+        echo Setting up Docker buildx.
+        docker buildx version
+        docker buildx create --name builder --driver docker-container --buildkitd-flags '--allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host' --use
+        docker buildx inspect --bootstrap --builder builder
+        docker buildx install
+        echo Preparing system dependencies for execution.
+        docker --version
+        docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD
+        curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+        bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3
+        export PATH=/miniconda3/bin:${PATH}
+        conda install python=3.12
+        conda update -y conda
+        echo Prepare HF_VLLM dependencies for execution.
+        mkdir hf-vllm-artifacts
+        python -m pip install -r $FRAMEWORK_FOLDER/hf-vllm-requirements.txt
+
+  build:
+    commands:
+      - |
+        echo "Current PYTHONPATH: $PYTHONPATH"
+        python $FRAMEWORK_FOLDER/hf-vllm.py
+
+  post_build:
+    commands:
+      - |
+        echo Build completed on `date`
@@ -0,0 +1,10 @@
+boto3
+dataclasses
+docker
+gitpython
+sagemaker
+
+parameterized
+pytest
+pytest-mock
+pytest-xdist
@@ -0,0 +1,131 @@
+import git
+import logging
+import os
+import shutil
+import subprocess
+import time
+
+from huggingface.pytorch.release_utils import (
+    GIT_REPO_DOCKERFILES_ROOT_DIRECTORY,
+    GIT_REPO_PYTEST_PATH,
+    LOG,
+    Aws,
+    DockerClient,
+    EnvironmentVariable,
+    Mode,
+    ReleaseConfigs
+)
+
+GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME = "hf-vllm"
+GIT_REPO_HF_VLLM_TAG_PATTERN = "v{version}"
+GIT_REPO_HF_VLLM_URL = "https://github.com/vllm-project/vllm.git"
+
+def build(configs: ReleaseConfigs):
+    """Builds the Docker image for the provided configs."""
+    aws = Aws()
+    docker_client = DockerClient()
+    for config in configs.releases:
+        LOG.info(f"Going to build image for config: {config}.")
+        image_uri = config.get_image_uri_for_staging()
+        if aws.does_ecr_image_exist(image_uri):
+            LOG.info(f"Skipping already built image '{image_uri}'. Config: {config}.")
+            continue
+
+        LOG.info(f"Setting up build prerequisites for release config with version: {config.version}")
+        build_path = GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME
+        shutil.rmtree(GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME, ignore_errors=True)
+        hf_vllm_repo = git.Repo.clone_from(GIT_REPO_HF_VLLM_URL, GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME, no_checkout=True)
+        hf_vllm_repo_tag = GIT_REPO_HF_VLLM_TAG_PATTERN.format(version=config.version)
+        hf_vllm_repo.git.checkout(hf_vllm_repo_tag)
+        LOG.info(f"Checked out {hf_vllm_repo} with tag: {hf_vllm_repo_tag} to {GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME}.")
+        shutil.copytree(GIT_REPO_DOCKERFILES_ROOT_DIRECTORY,
+            os.path.join(GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME, GIT_REPO_DOCKERFILES_ROOT_DIRECTORY))
+        LOG.info(f"Copied '{GIT_REPO_DOCKERFILES_ROOT_DIRECTORY}' directory to HF_VLLM directory for 'COPY' command.")
+
+        dockerfile_path = config.get_dockerfile_path()
+        LOG.info(f"Building Dockerfile: '{dockerfile_path}'. This may take a while...")
+        docker_client.build(image_uri=image_uri, dockerfile_path=dockerfile_path, build_path=build_path)
+
+        username, password = aws.get_ecr_credentials(image_uri)
+        docker_client.login(username, password, image_uri)
+        docker_client.push(image_uri)
+
+def test(configs: ReleaseConfigs):
+    """Runs SageMaker tests for the Docker images associated with the provided configs and current git commit."""
+    aws = Aws()
+    for config in configs.releases:
+        LOG.info(f"Going to test built image for config: {config}.")
+        test_role_arn = os.getenv(EnvironmentVariable.TEST_ROLE_ARN.name)
+        test_session = aws.get_session_for_role(test_role_arn)
+        test_credentials = test_session.get_credentials()
+        environ = os.environ.copy()
+        environ.update({
+            "DEVICE_TYPE": config.device.lower(),
+            "AWS_ACCESS_KEY_ID": test_credentials.access_key,
+            "AWS_SECRET_ACCESS_KEY": test_credentials.secret_key,
+            "AWS_SESSION_TOKEN": test_credentials.token,
+            "IMAGE_URI": config.get_image_uri_for_staging(),
+            "TEST_ROLE_ARN": test_role_arn })
+
+        command = ["pytest", "-m", config.device.lower(), "-n", "auto", "--log-cli-level", "info", GIT_REPO_PYTEST_PATH]
+        LOG.info(f"Running test command: {command}.")
+        process = subprocess.run(command, env=environ, encoding="utf-8", capture_output=True)
+        LOG.info(process.stdout)
+        assert process.returncode == 0, f"Failed with config: {config}.\nError: {process.stderr}."
+        LOG.info(f"Finished testing image with config: {config}.")
+
+
+def pr(configs: ReleaseConfigs):
+    """Executes both build and test modes."""
+    build(configs)
+    test(configs)
+
+def release(configs: ReleaseConfigs):
+    """trigger SMFrameworks algo release pipeline"""
+    aws = Aws()
+    docker_client = DockerClient()
+    for config in configs.releases:
+        LOG.info(f"Releasing image associated for config: {config}.")
+        released_image_uri = config.get_image_uri_for_released()
+        if aws.does_ecr_image_exist(released_image_uri):
+            LOG.info(f"Skipping already released image '{released_image_uri}'. Config: {config}.")
+            continue
+
+        staged_image_uri = config.get_image_uri_for_staging()
+        username, password = aws.get_ecr_credentials(staged_image_uri)
+        docker_client.login(username, password, staged_image_uri)
+        docker_client.prune_all()
+        docker_client.pull(staged_image_uri)
+
+        docker_client.login(username, password, staged_image_uri)
+        docker_client.tag(staged_image_uri, released_image_uri)
+        docker_client.push(released_image_uri)
+
+        js_uris = config.get_image_uris_for_jumpstart()
+        username, password = aws.get_ecr_credentials(js_uris[0])
+        docker_client.login(username, password, js_uris[0])
+        for js_uri in js_uris:
+            docker_client.tag(staged_image_uri, js_uri)
+            docker_client.push(js_uri)
+        LOG.info(f"Release marked as complete for following config ({js_uris}): {config}")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s %(levelname)-8s %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S")
+    configs = ReleaseConfigs()
+    configs.validate()
+    mode = os.getenv(EnvironmentVariable.MODE.name)
+    LOG.info(f"Mode has been set to: {mode}.")
+    if mode == Mode.PR.name:
+        pr(configs)
+    elif mode == Mode.BUILD.name:
+        build(configs)
+    elif mode == Mode.TEST.name:
+        test(configs)
+    elif mode == Mode.RELEASE.name:
+        release(configs)
+    else:
+        raise ValueError(f"The mode '{mode}' is not recognized. Please set it correctly.'")
@@ -20,13 +20,13 @@
     "TGI": ["GPU", "INF2"],
     "TEI": ["GPU", "CPU"],
     "TGILLAMACPP": ["CPU"],
+    "HF-VLLM": ["GPU", "ROCM"],
 }
-Framework = enum.Enum("Framework", ["TGI", "OPTIMUM", "TEI", "TGILLAMACPP"])
-Device = enum.Enum("Device", ["GPU", "INF2", "CPU"])
-Mode = enum.Enum("Mode", ["PR", "BUILD", "TEST", "RELEASE"])
-PipelineStatus = enum.Enum(
-    "PipelineStatus", ["IN_PROGRESS", "SUCCESSFUL", "UNSUCCESSFUL"]
-)
+
+Framework = enum.Enum("Framework", ["TGI", "OPTIMUM", "TEI", "TGILLAMACPP", "HF-VLLM"])
+Device = enum.Enum("Device", ["GPU", "INF2", "CPU", "ROCM"])
+Mode = enum.Enum ("Mode", ["PR", "BUILD", "TEST", "RELEASE"])
+PipelineStatus = enum.Enum ("PipelineStatus", ["IN_PROGRESS", "SUCCESSFUL", "UNSUCCESSFUL"])
 VulnerabilitySeverity = enum.Enum("VulnerabilitySeverity", ["CRITICAL", "HIGH"])
 EnvironmentVariable = enum.Enum(
     "EnvironmentVariable",
@@ -50,8 +50,10 @@
 DEFAULT_CRED_REFRESH_INTERVAL_IN_SECONDS = 1800
 DEFAULT_WAIT_INTERVAL_IN_SECONDS = 60
 DLC_PIPELINE_NAME_BY_DEVICE = {
-    Device.GPU.name.lower(): "HFTgiReleasePipeline-huggingface-pytorch-tgi-inference-gpu",
-    Device.INF2.name.lower(): "HFTgiReleasePipeline-huggingface-pytorch-tgi-inference-neuronx",
+    Device.GPU.name.lower(): "HFReleasePipeline-huggingface-pytorch-inference-gpu",
+    Device.INF2.name.lower(): "HFReleasePipeline-huggingface-pytorch-inference-neuronx",
+    Device.CPU.name.lower(): "HFReleasePipeline-huggingface-pytorch-inference-cpu",
+    Device.ROCM.name.lower(): "HFReleasePipeline-huggingface-pytorch-inference-rocm",
 }
 ECR_RELEASED_SUFFIX_TAG = "-released"
 ECR_TAG_DIGEST_PREFIX = "sha256"

@@ -124,30 +124,39 @@
                 "python_version": "py310",
                 "pytorch_version": "2.0.1"
             }
+
+        ],
+        "HF-VLLM": [
+            {
+                "device": "gpu",
+                "min_version": "0.10.2",
+                "max_version": "0.11.0",
+                "os_version": "ubuntu22.04",
+                "cuda_version": "cu128",
+                "python_version": "py312",
+                "pytorch_version": "2.8.0"
+            }
         ]
     },
     "ignore_vulnerabilities": [
         "CVE-2024-42154 - linux",
         "CVE-2025-32434 - torch",
-        "CVE-2024-48063 - torch"
+        "CVE-2024-48063 - torch",
+        "CVE-2024-35366 -- ffmpeg",
+        "CVE-2024-35367 -- ffmpeg",
+        "CVE-2024-35368 -- ffmpeg"
+
     ],
     "releases": [
         {
-            "framework": "TGI",
+            "framework": "HF-VLLM",
             "device": "gpu",
-            "version": "3.3.6",
-            "os_version": "ubuntu22.04",
-            "cuda_version": "cu124",
-            "python_version": "py311",
-            "pytorch_version": "2.7.0"
-        },
-        {
-            "framework": "TGI",
-            "device": "inf2",
-            "version": "3.3.6",
+            "version": "0.11.0",
             "os_version": "ubuntu22.04",
-            "python_version": "py310",
-            "pytorch_version": "2.7.0"
+            "python_version": "py312",
+            "pytorch_version": "2.8.0",
+            "cuda_version": "cu128"
         }
+
     ]
 }