Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions .github/workflows/release-serve.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ on:
description: "the release version (e.g : 0.1.0)"
required: true
type: string
ray_version:
description: "the ray version branch/tag to build from"
required: false
type: string
default: "ray-2.53.0-neutree"
accelerators:
description: "accelerator types to build (e.g : gpu, amd-gpu)"
required: false
type: string
default: "gpu"

jobs:
build-amd64-image:
Expand All @@ -28,7 +38,7 @@ jobs:
username: ${{ secrets.SERVE_IMAGE_PUSH_USERNAME }}
password: ${{ secrets.SERVE_IMAGE_PUSH_TOKEN }}
- name: build amd64 image
run: cd cluster-image-builder; export ARCH=amd64 VERSION=${{ github.event.inputs.version }}; make docker-build && make docker-push
run: cd cluster-image-builder; export ARCH=amd64 VERSION=${{ github.event.inputs.version }} RAY_VERSION=${{ github.event.inputs.ray_version }} ACCELERATORS="${{ github.event.inputs.accelerators }}"; make docker-build && make docker-push
env:
IMAGE_PROJECT: ${{ secrets.RELEASE_SERVE_IMAGE_PROJECT }}
IMAGE_REPO: ${{ secrets.SERVE_IMAGE_REPO }}
Expand All @@ -51,7 +61,7 @@ jobs:
- name: push manifests
run: |
cd cluster-image-builder
export VERSION=${{ github.event.inputs.version }} ALL_ARCH=amd64; make docker-push-manifest
export VERSION=${{ github.event.inputs.version }} RAY_VERSION=${{ github.event.inputs.ray_version }} ALL_ARCH=amd64 ACCELERATORS="${{ github.event.inputs.accelerators }}"; make docker-push-manifest

env:
IMAGE_PROJECT: ${{ secrets.RELEASE_SERVE_IMAGE_PROJECT }}
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ scripts/dashboard/output
cluster-image-builder/downloader
scripts/builder/dist

claude.md

# local agents
.agents/
.claude/
15 changes: 9 additions & 6 deletions cluster-image-builder/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
ARG RAY_BASE_IMAGE
ARG RAY_BRANCH="ray-2.43.0-neutree"
ARG RAY_COMMIT
ARG COMMON_WORKDIR=/app
ARG RAY_REPO="https://github.com/neutree-ai/ray.git"
ARG RAY_REPO
ARG RAY_BUILD_BASE_IMAGE="quay.io/pypa/manylinux2014_x86_64:2024-07-02-9ac04ee"

FROM alpine/git:v2.47.2 as ray_fetch
ARG RAY_REPO
ARG RAY_BRANCH
ARG RAY_COMMIT
ARG COMMON_WORKDIR
WORKDIR ${COMMON_WORKDIR}
RUN git clone ${RAY_REPO} \
&& cd ray \
&& git checkout ${RAY_BRANCH}
&& git checkout ${RAY_COMMIT}

FROM ${RAY_BUILD_BASE_IMAGE} AS build_ray
ARG COMMON_WORKDIR
ARG RAY_BRANCH
ENV BUILDKITE_COMMIT=${RAY_BRANCH}
ARG RAY_COMMIT
ENV BUILDKITE_COMMIT=${RAY_COMMIT}
ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
ENV BUILD_ONE_PYTHON_ONLY=py311
ENV RAY_DISABLE_EXTRA_CPP=1
Expand All @@ -26,6 +26,9 @@ RUN /ray/python/build-wheel-manylinux2014.sh

FROM ${RAY_BASE_IMAGE}

# Disable OTEL metrics backend due to metrics loss issue, fall back to OpenCensus
ENV RAY_enable_open_telemetry=false

# Install NFS dependencies
USER root
RUN apt-get update && apt-get install util-linux nfs-common -y
Expand Down
12 changes: 6 additions & 6 deletions cluster-image-builder/Dockerfile.rocm
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# default base image
ARG VLLM_BRANCH="v0.8.5-neutree"
ARG VLLM_REPO="https://github.com/neutree-ai/vllm.git"
ARG RAY_BRANCH="ray-2.43.0-neutree"
ARG RAY_REPO="https://github.com/neutree-ai/ray.git"
ARG RAY_COMMIT
ARG RAY_REPO
ARG USE_CYTHON="0"
ARG BUILD_RPD="1"
ARG COMMON_WORKDIR=/app
Expand Down Expand Up @@ -54,16 +54,16 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
# Ray build stages
FROM base AS ray_fetch
ARG RAY_REPO
ARG RAY_BRANCH
ARG RAY_COMMIT
RUN git clone ${RAY_REPO} \
&& cd ray \
&& git checkout ${RAY_BRANCH}
&& git checkout ${RAY_COMMIT}


FROM ${RAY_BUILD_BASE_IMAGE} AS build_ray
ARG COMMON_WORKDIR
ARG RAY_BRANCH
ENV BUILDKITE_COMMIT=${RAY_BRANCH}
ARG RAY_COMMIT
ENV BUILDKITE_COMMIT=${RAY_COMMIT}
ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
ENV BUILD_ONE_PYTHON_ONLY=py311
ENV RAY_DISABLE_EXTRA_CPP=1
Expand Down
10 changes: 7 additions & 3 deletions cluster-image-builder/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@ ARCH ?= amd64
ACCELERATORS ?= gpu amd-gpu
ALL_ARCH ?= amd64 arm64

RAY_BASE_IMAGE ?= rayproject/ray:2.43.0-py311-cu121
RAY_BASE_IMAGE ?= rayproject/ray:2.53.0-py311-cu121
ifeq ($(ARCH), arm64)
RAY_BASE_IMAGE := $(RAY_BASE_IMAGE)-aarch64
endif

RAY_REPO ?= https://github.com/neutree-ai/ray.git
RAY_VERSION ?= ray-2.53.0-neutree
RAY_COMMIT ?= $(shell git ls-remote $(RAY_REPO) $(RAY_VERSION) | cut -f1 | head -c 7)

ROCM_BASE_IMAGE ?= $(NEUTREE_SERVE_IMAGE):rocm-base

.PHONY: docker-build
Expand All @@ -23,11 +27,11 @@ docker-build: ## Run docker-build-* targets for all the images

.PHONY: docker-build-gpu
docker-build-gpu: prepare ## Build the GPU image
docker build --build-arg RAY_BASE_IMAGE=$(RAY_BASE_IMAGE) -f Dockerfile -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG) .
docker build --build-arg RAY_BASE_IMAGE=$(RAY_BASE_IMAGE) --build-arg RAY_COMMIT=$(RAY_COMMIT) --build-arg RAY_REPO=$(RAY_REPO) -f Dockerfile -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG) .

.PHONY: docker-build-amd-gpu
docker-build-amd-gpu: prepare ## Build the AMD GPU image
docker build --build-arg BASE_IMAGE=$(ROCM_BASE_IMAGE) -f Dockerfile.rocm -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG)-rocm .
docker build --build-arg BASE_IMAGE=$(ROCM_BASE_IMAGE) --build-arg RAY_COMMIT=$(RAY_COMMIT) --build-arg RAY_REPO=$(RAY_REPO) -f Dockerfile.rocm -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG)-rocm .

.PHONY: docker-push
docker-push: ## Run docker-push-* targets for all the images
Expand Down
4 changes: 2 additions & 2 deletions cluster-image-builder/requirements/cuda.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
llama_cpp_python==0.3.7
vllm==0.8.5
ray[serve]==2.43.0
vllm==0.11.2
ray[serve]==2.53.0
numpy==1.26.4
opencv-python-headless==4.11.0.86
2 changes: 1 addition & 1 deletion cluster-image-builder/requirements/cuda_arm.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
llama_cpp_python==0.3.7
ray[serve]==2.43.0
ray[serve]==2.53.0
numpy==1.26.4
opencv-python-headless==4.11.0.86
95 changes: 95 additions & 0 deletions cluster-image-builder/serve/_metrics/ray_stat_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import logging

from ray import serve
from vllm.v1.metrics.ray_wrappers import (
RayPrometheusStatLogger,
RaySpecDecodingProm,
RayKVConnectorPrometheus,
RayGaugeWrapper,
RayCounterWrapper,
RayHistogramWrapper,
)

logger = logging.getLogger("ray.serve")


def _make_extended_metric_cls(base_cls, extra_labels):
"""Create a metric wrapper that transparently extends labelnames."""

class Extended(base_cls):
def __init__(self, name, documentation=None, labelnames=None, **kwargs):
extended_names = list(labelnames or []) + list(extra_labels.keys())
super().__init__(name=name, documentation=documentation,
labelnames=extended_names, **kwargs)

def labels(self, *args, **kwargs):
if args:
args = args + tuple(extra_labels.values())
if kwargs:
kwargs.update(extra_labels)
return super().labels(*args, **kwargs)

return Extended


def _make_extended_spec_decoding_cls(base_cls, extra_labels):
"""Extend SpecDecodingProm with custom labels via its _counter_cls."""

class Extended(base_cls):
_counter_cls = _make_extended_metric_cls(RayCounterWrapper, extra_labels)

return Extended


def _make_extended_kv_connector_cls(base_cls, extra_labels):
"""Extend KVConnectorPrometheus with custom labels via its _cls vars."""

class Extended(base_cls):
_gauge_cls = _make_extended_metric_cls(RayGaugeWrapper, extra_labels)
_counter_cls = _make_extended_metric_cls(RayCounterWrapper, extra_labels)
_histogram_cls = _make_extended_metric_cls(RayHistogramWrapper, extra_labels)

return Extended


class NeutreeRayStatLogger(RayPrometheusStatLogger):
"""RayPrometheusStatLogger with Ray Serve context labels injected.

Transparently extends all vLLM metrics with deployment, replica,
and application labels from the Ray Serve replica context.
"""

def __init__(self, vllm_config, engine_indexes=None):
extra_labels = {}
try:
ctx = serve.get_replica_context()
extra_labels = {
"deployment": ctx.deployment,
"replica": ctx.replica_tag,
}
if hasattr(ctx, "app_name"):
extra_labels["application"] = ctx.app_name
except RuntimeError:
logger.warning(
"NeutreeRayStatLogger: not running in Ray Serve context, "
"skipping custom labels"
)

if extra_labels:
self._gauge_cls = _make_extended_metric_cls(
RayGaugeWrapper, extra_labels)
self._counter_cls = _make_extended_metric_cls(
RayCounterWrapper, extra_labels)
self._histogram_cls = _make_extended_metric_cls(
RayHistogramWrapper, extra_labels)
self._spec_decoding_cls = _make_extended_spec_decoding_cls(
RaySpecDecodingProm, extra_labels)
self._kv_connector_cls = _make_extended_kv_connector_cls(
RayKVConnectorPrometheus, extra_labels)

super().__init__(vllm_config, engine_indexes)

logger.info(
f"NeutreeRayStatLogger initialized with extra labels: "
f"{list(extra_labels.keys())}"
)
Loading