Skip to content
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
583 changes: 583 additions & 0 deletions huggingface/pytorch/hf-vllm/docker/0.10.2/THIRD-PARTY-LICENSES

Large diffs are not rendered by default.

557 changes: 557 additions & 0 deletions huggingface/pytorch/hf-vllm/docker/0.10.2/gpu/Dockerfile

Large diffs are not rendered by default.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also remove the file if it is empty. Want to keep codebase clean.

Empty file.
42 changes: 42 additions & 0 deletions huggingface/pytorch/hf-vllm/docker/0.11.0/gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
ARG FINAL_BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/vllm:0.11.0-gpu-py312-cu128-ubuntu22.04-sagemaker-v1.7
FROM ${FINAL_BASE_IMAGE} AS vllm-base

LABEL maintainer="Amazon AI"
LABEL dlc_major_version="1"

ARG HUGGINGFACE_HUB_VERSION=0.36.0
ARG HF_XET_VERSION=1.2.0

RUN apt-get update -y \
&& apt-get install -y --no-install-recommends curl unzip \
&& rm -rf /var/lib/apt/lists/*


RUN pip install --upgrade pip && \
pip install --no-cache-dir \
huggingface-hub==${HUGGINGFACE_HUB_VERSION} \
hf-xet==${HF_XET_VERSION} \
grpcio


FROM vllm-base AS sagemaker
ENV HF_HUB_ENABLE_HF_TRANSFER="1" \
HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:gpu-cuda:inference:hf-vllm"

RUN set -eux; \
HOME_DIR=/root; \
uv pip install --system --upgrade pip requests PTable; \
curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip; \
unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/; \
cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance; \
chmod +x /usr/local/bin/testOSSCompliance; \
chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh; \
${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3; \
rm -rf ${HOME_DIR}/oss_compliance*

COPY /huggingface/pytorch/hf-vllm/docker/0.11.0/THIRD-PARTY-LICENSES /root/THIRD-PARTY-LICENSES

ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]



51 changes: 51 additions & 0 deletions huggingface/pytorch/hf-vllm/docker/buildspec.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

version: 0.2

env:
shell: bash
variables:
FRAMEWORK_FOLDER: "huggingface/pytorch/hf-vllm/docker"
PYTHONPATH: "/codebuild/output/src*/src/github.com/awslabs/llm-hosting-container"

phases:
install:
runtime-versions:
python: 3.12
commands:
- echo "Installing Python version 3.12 ..."
- pyenv global $PYTHON_312_VERSION

pre_build:
commands:
- echo Pre-build started on `date`
- export PYTHONPATH=$(pwd):$PYTHONPATH

# Continue with regular pre-build steps if BUILD_REQUIRED=true
- |
echo Setting up Docker buildx.
docker buildx version
docker buildx create --name builder --driver docker-container --buildkitd-flags '--allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host' --use
docker buildx inspect --bootstrap --builder builder
docker buildx install
echo Preparing system dependencies for execution.
docker --version
docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD
curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3
export PATH=/miniconda3/bin:${PATH}
conda install python=3.12
conda update -y conda
echo Prepare HF_VLLM dependencies for execution.
mkdir hf-vllm-artifacts
python -m pip install -r $FRAMEWORK_FOLDER/hf-vllm-requirements.txt

build:
commands:
- |
echo "Current PYTHONPATH: $PYTHONPATH"
python $FRAMEWORK_FOLDER/hf-vllm.py

post_build:
commands:
- |
echo Build completed on `date`
10 changes: 10 additions & 0 deletions huggingface/pytorch/hf-vllm/docker/hf-vllm-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
boto3
dataclasses
docker
gitpython
sagemaker

parameterized
pytest
pytest-mock
pytest-xdist
131 changes: 131 additions & 0 deletions huggingface/pytorch/hf-vllm/docker/hf-vllm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import git
import logging
import os
import shutil
import subprocess
import time

from huggingface.pytorch.release_utils import (
GIT_REPO_DOCKERFILES_ROOT_DIRECTORY,
GIT_REPO_PYTEST_PATH,
LOG,
Aws,
DockerClient,
EnvironmentVariable,
Mode,
ReleaseConfigs
)

GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME = "hf-vllm"
GIT_REPO_HF_VLLM_TAG_PATTERN = "v{version}"
GIT_REPO_HF_VLLM_URL = "https://github.com/vllm-project/vllm.git"

def build(configs: ReleaseConfigs):
"""Builds the Docker image for the provided configs."""
aws = Aws()
docker_client = DockerClient()
for config in configs.releases:
LOG.info(f"Going to build image for config: {config}.")
image_uri = config.get_image_uri_for_staging()
if aws.does_ecr_image_exist(image_uri):
LOG.info(f"Skipping already built image '{image_uri}'. Config: {config}.")
continue

LOG.info(f"Setting up build prerequisites for release config with version: {config.version}")
build_path = GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME
shutil.rmtree(GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME, ignore_errors=True)
hf_vllm_repo = git.Repo.clone_from(GIT_REPO_HF_VLLM_URL, GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME, no_checkout=True)
hf_vllm_repo_tag = GIT_REPO_HF_VLLM_TAG_PATTERN.format(version=config.version)
hf_vllm_repo.git.checkout(hf_vllm_repo_tag)
LOG.info(f"Checked out {hf_vllm_repo} with tag: {hf_vllm_repo_tag} to {GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME}.")
shutil.copytree(GIT_REPO_DOCKERFILES_ROOT_DIRECTORY,
os.path.join(GIT_REPO_HF_VLLM_LOCAL_FOLDER_NAME, GIT_REPO_DOCKERFILES_ROOT_DIRECTORY))
LOG.info(f"Copied '{GIT_REPO_DOCKERFILES_ROOT_DIRECTORY}' directory to HF_VLLM directory for 'COPY' command.")

dockerfile_path = config.get_dockerfile_path()
LOG.info(f"Building Dockerfile: '{dockerfile_path}'. This may take a while...")
docker_client.build(image_uri=image_uri, dockerfile_path=dockerfile_path, build_path=build_path)

username, password = aws.get_ecr_credentials(image_uri)
docker_client.login(username, password, image_uri)
docker_client.push(image_uri)

def test(configs: ReleaseConfigs):
"""Runs SageMaker tests for the Docker images associated with the provided configs and current git commit."""
aws = Aws()
for config in configs.releases:
LOG.info(f"Going to test built image for config: {config}.")
test_role_arn = os.getenv(EnvironmentVariable.TEST_ROLE_ARN.name)
test_session = aws.get_session_for_role(test_role_arn)
test_credentials = test_session.get_credentials()
environ = os.environ.copy()
environ.update({
"DEVICE_TYPE": config.device.lower(),
"AWS_ACCESS_KEY_ID": test_credentials.access_key,
"AWS_SECRET_ACCESS_KEY": test_credentials.secret_key,
"AWS_SESSION_TOKEN": test_credentials.token,
"IMAGE_URI": config.get_image_uri_for_staging(),
"TEST_ROLE_ARN": test_role_arn })

command = ["pytest", "-m", config.device.lower(), "-n", "auto", "--log-cli-level", "info", GIT_REPO_PYTEST_PATH]
LOG.info(f"Running test command: {command}.")
process = subprocess.run(command, env=environ, encoding="utf-8", capture_output=True)
LOG.info(process.stdout)
assert process.returncode == 0, f"Failed with config: {config}.\nError: {process.stderr}."
LOG.info(f"Finished testing image with config: {config}.")


def pr(configs: ReleaseConfigs):
"""Executes both build and test modes."""
build(configs)
test(configs)

def release(configs: ReleaseConfigs):
"""trigger SMFrameworks algo release pipeline"""
aws = Aws()
docker_client = DockerClient()
for config in configs.releases:
LOG.info(f"Releasing image associated for config: {config}.")
released_image_uri = config.get_image_uri_for_released()
if aws.does_ecr_image_exist(released_image_uri):
LOG.info(f"Skipping already released image '{released_image_uri}'. Config: {config}.")
continue

staged_image_uri = config.get_image_uri_for_staging()
username, password = aws.get_ecr_credentials(staged_image_uri)
docker_client.login(username, password, staged_image_uri)
docker_client.prune_all()
docker_client.pull(staged_image_uri)

docker_client.login(username, password, staged_image_uri)
docker_client.tag(staged_image_uri, released_image_uri)
docker_client.push(released_image_uri)

js_uris = config.get_image_uris_for_jumpstart()
username, password = aws.get_ecr_credentials(js_uris[0])
docker_client.login(username, password, js_uris[0])
for js_uri in js_uris:
docker_client.tag(staged_image_uri, js_uri)
docker_client.push(js_uri)
LOG.info(f"Release marked as complete for following config ({js_uris}): {config}")


if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S")
configs = ReleaseConfigs()
configs.validate()
mode = os.getenv(EnvironmentVariable.MODE.name)
LOG.info(f"Mode has been set to: {mode}.")
if mode == Mode.PR.name:
pr(configs)
elif mode == Mode.BUILD.name:
build(configs)
elif mode == Mode.TEST.name:
test(configs)
elif mode == Mode.RELEASE.name:
release(configs)
else:
raise ValueError(f"The mode '{mode}' is not recognized. Please set it correctly.'")
18 changes: 10 additions & 8 deletions huggingface/pytorch/release_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
"TGI": ["GPU", "INF2"],
"TEI": ["GPU", "CPU"],
"TGILLAMACPP": ["CPU"],
"HF-VLLM": ["GPU", "ROCM"],
}
Framework = enum.Enum("Framework", ["TGI", "OPTIMUM", "TEI", "TGILLAMACPP"])
Device = enum.Enum("Device", ["GPU", "INF2", "CPU"])
Mode = enum.Enum("Mode", ["PR", "BUILD", "TEST", "RELEASE"])
PipelineStatus = enum.Enum(
"PipelineStatus", ["IN_PROGRESS", "SUCCESSFUL", "UNSUCCESSFUL"]
)

Framework = enum.Enum("Framework", ["TGI", "OPTIMUM", "TEI", "TGILLAMACPP", "HF-VLLM"])
Device = enum.Enum("Device", ["GPU", "INF2", "CPU", "ROCM"])
Mode = enum.Enum ("Mode", ["PR", "BUILD", "TEST", "RELEASE"])
PipelineStatus = enum.Enum ("PipelineStatus", ["IN_PROGRESS", "SUCCESSFUL", "UNSUCCESSFUL"])
VulnerabilitySeverity = enum.Enum("VulnerabilitySeverity", ["CRITICAL", "HIGH"])
EnvironmentVariable = enum.Enum(
"EnvironmentVariable",
Expand All @@ -50,8 +50,10 @@
DEFAULT_CRED_REFRESH_INTERVAL_IN_SECONDS = 1800
DEFAULT_WAIT_INTERVAL_IN_SECONDS = 60
DLC_PIPELINE_NAME_BY_DEVICE = {
Device.GPU.name.lower(): "HFTgiReleasePipeline-huggingface-pytorch-tgi-inference-gpu",
Device.INF2.name.lower(): "HFTgiReleasePipeline-huggingface-pytorch-tgi-inference-neuronx",
Device.GPU.name.lower(): "HFReleasePipeline-huggingface-pytorch-inference-gpu",
Device.INF2.name.lower(): "HFReleasePipeline-huggingface-pytorch-inference-neuronx",
Device.CPU.name.lower(): "HFReleasePipeline-huggingface-pytorch-inference-cpu",
Device.ROCM.name.lower(): "HFReleasePipeline-huggingface-pytorch-inference-rocm",
}
ECR_RELEASED_SUFFIX_TAG = "-released"
ECR_TAG_DIGEST_PREFIX = "sha256"
Expand Down
37 changes: 23 additions & 14 deletions releases.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,30 +124,39 @@
"python_version": "py310",
"pytorch_version": "2.0.1"
}

],
"HF-VLLM": [
{
"device": "gpu",
"min_version": "0.10.2",
"max_version": "0.11.0",
"os_version": "ubuntu22.04",
"cuda_version": "cu128",
"python_version": "py312",
"pytorch_version": "2.8.0"
}
]
},
"ignore_vulnerabilities": [
"CVE-2024-42154 - linux",
"CVE-2025-32434 - torch",
"CVE-2024-48063 - torch"
"CVE-2024-48063 - torch",
"CVE-2024-35366 -- ffmpeg",
"CVE-2024-35367 -- ffmpeg",
"CVE-2024-35368 -- ffmpeg"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the extra line for clean code.

],
"releases": [
{
"framework": "TGI",
"framework": "HF-VLLM",
"device": "gpu",
"version": "3.3.6",
"os_version": "ubuntu22.04",
"cuda_version": "cu124",
"python_version": "py311",
"pytorch_version": "2.7.0"
},
{
"framework": "TGI",
"device": "inf2",
"version": "3.3.6",
"version": "0.11.0",
"os_version": "ubuntu22.04",
"python_version": "py310",
"pytorch_version": "2.7.0"
"python_version": "py312",
"pytorch_version": "2.8.0",
"cuda_version": "cu128"
}

]
}
Loading