Skip to content

environments foundation model inference

github-actions[bot] edited this page Mar 24, 2026 · 107 revisions

foundation-model-inference

Overview

Environment used for deploying model to use DS-MII or vLLM for inference

Version: 89

Tags

Preview DS-MII VLLM : 0.10.2

View in Studio: https://ml.azure.com/registries/azureml/environments/foundation-model-inference/version/89

Docker image: mcr.microsoft.com/azureml/curated/foundation-model-inference:89

Docker build context

Dockerfile

FROM nvidia/cuda:13.1.1-devel-ubuntu22.04

RUN rm -rf /opt/nvidia/nsight-compute

ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    TZ=Etc/UTC \
    DEBIAN_FRONTEND=noninteractive

RUN apt update && apt upgrade -y && apt install software-properties-common -y && add-apt-repository ppa:deadsnakes/ppa -y
RUN apt install git -y

ENV MINICONDA_VERSION py310_23.10.0-1
ENV PATH /opt/miniconda/bin:$PATH
RUN apt-get update && \
    apt-get install -y --no-install-recommends wget runit
RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \
    bash /tmp/miniconda.sh -bf -p /opt/miniconda && \
    conda update --all -c conda-forge -y && \
    conda clean -ay && \
    rm -rf /opt/miniconda/pkgs && \
    rm /tmp/miniconda.sh && \
    find / -type d -name __pycache__ | xargs rm -rf

ENV AZUREML_CONDA_ENVIRONMENT_PATH=/opt/conda/envs/azureml-env

# Create conda environment
RUN conda create -y -p $AZUREML_CONDA_ENVIRONMENT_PATH python=3.10

# Activate environment via PATH
ENV PATH=$AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
ENV CONDA_DEFAULT_ENV=$AZUREML_CONDA_ENVIRONMENT_PATH
ENV CONDA_PREFIX=$AZUREML_CONDA_ENVIRONMENT_PATH

WORKDIR /

# torch installation
# --- Install PyTorch 2.8.0 FIRST with CUDA 12.8 support (compatible with vLLM 0.13.0) ---
RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cu128 \
    torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0
    
# When copied to assets repo, change to install from public pypi
RUN pip install llm-optimized-inference==0.2.52 --no-cache-dir

RUN pip install --no-cache-dir --force-reinstall xgrammar==0.1.29

# Patch Ray bundled vulnerabilities:
RUN rm -f $(python -c "import ray;print(ray.__path__[0])")/jars/ray_dist.jar && \
    rm -rf $(python -c "import ray;print(ray.__path__[0])")/_private/runtime_env/agent/thirdparty_files/*aiohttp* && \
    pip install --no-cache-dir --force-reinstall aiohttp==3.13.3

# clean conda and pip caches
RUN rm -rf ~/.cache/pip

ADD runit_folder/api_server /var/runit/api_server
RUN sed -i 's/\r$//g' /var/runit/api_server/run
RUN chmod +x /var/runit/api_server/run

ENV SVDIR=/var/runit
ENV WORKER_TIMEOUT=3600
EXPOSE 5001
CMD [ "runsvdir", "/var/runit" ]

Clone this wiki locally