neutree-ai · Levi080513 · Mar 3, 2026 · Feb 13, 2026 · Feb 14, 2026 · Feb 14, 2026
diff --git a/.github/workflows/release-serve.yaml b/.github/workflows/release-serve.yaml
@@ -11,6 +11,16 @@ on:
         description: "the release version (e.g : 0.1.0)"
         required: true
         type: string
+      ray_version:
+        description: "the ray version branch/tag to build from"
+        required: false
+        type: string
+        default: "ray-2.53.0-neutree"
+      accelerators:
+        description: "accelerator types to build (e.g : gpu, amd-gpu)"
+        required: false
+        type: string
+        default: "gpu"
 
 jobs:
   build-amd64-image:
@@ -28,7 +38,7 @@ jobs:
           username: ${{ secrets.SERVE_IMAGE_PUSH_USERNAME }}
           password: ${{ secrets.SERVE_IMAGE_PUSH_TOKEN }}
       - name: build amd64 image
-        run: cd cluster-image-builder; export ARCH=amd64 VERSION=${{ github.event.inputs.version }}; make docker-build && make docker-push
+        run: cd cluster-image-builder; export ARCH=amd64 VERSION=${{ github.event.inputs.version }} RAY_VERSION=${{ github.event.inputs.ray_version }} ACCELERATORS="${{ github.event.inputs.accelerators }}"; make docker-build && make docker-push
         env:
           IMAGE_PROJECT: ${{ secrets.RELEASE_SERVE_IMAGE_PROJECT }}
           IMAGE_REPO: ${{ secrets.SERVE_IMAGE_REPO }}
@@ -51,7 +61,7 @@ jobs:
       - name: push manifests
         run: |
           cd cluster-image-builder
-          export VERSION=${{ github.event.inputs.version }} ALL_ARCH=amd64; make docker-push-manifest
+          export VERSION=${{ github.event.inputs.version }} RAY_VERSION=${{ github.event.inputs.ray_version }} ALL_ARCH=amd64 ACCELERATORS="${{ github.event.inputs.accelerators }}"; make docker-push-manifest
 
         env:
           IMAGE_PROJECT: ${{ secrets.RELEASE_SERVE_IMAGE_PROJECT }}

diff --git a/.gitignore b/.gitignore
@@ -36,6 +36,8 @@ scripts/dashboard/output
 cluster-image-builder/downloader
 scripts/builder/dist
 
+claude.md
+
 # local agents
 .agents/
 .claude/
diff --git a/cluster-image-builder/Dockerfile b/cluster-image-builder/Dockerfile
@@ -1,22 +1,22 @@
 ARG RAY_BASE_IMAGE
-ARG RAY_BRANCH="ray-2.43.0-neutree"
+ARG RAY_COMMIT
 ARG COMMON_WORKDIR=/app
-ARG RAY_REPO="https://github.com/neutree-ai/ray.git"
+ARG RAY_REPO
 ARG RAY_BUILD_BASE_IMAGE="quay.io/pypa/manylinux2014_x86_64:2024-07-02-9ac04ee"
 
 FROM alpine/git:v2.47.2 as ray_fetch
 ARG RAY_REPO
-ARG RAY_BRANCH
+ARG RAY_COMMIT
 ARG COMMON_WORKDIR
 WORKDIR ${COMMON_WORKDIR}
 RUN git clone ${RAY_REPO} \
 	    && cd ray \
-	    && git checkout ${RAY_BRANCH}
+	    && git checkout ${RAY_COMMIT}
 
 FROM ${RAY_BUILD_BASE_IMAGE} AS build_ray
 ARG COMMON_WORKDIR
-ARG RAY_BRANCH
-ENV BUILDKITE_COMMIT=${RAY_BRANCH}
+ARG RAY_COMMIT
+ENV BUILDKITE_COMMIT=${RAY_COMMIT}
 ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
 ENV BUILD_ONE_PYTHON_ONLY=py311
 ENV RAY_DISABLE_EXTRA_CPP=1
@@ -26,6 +26,9 @@ RUN /ray/python/build-wheel-manylinux2014.sh
 
 FROM ${RAY_BASE_IMAGE}
 
+# Disable OTEL metrics backend due to metrics loss issue, fall back to OpenCensus
+ENV RAY_enable_open_telemetry=false
+
 # Install NFS dependencies
 USER root
 RUN apt-get update && apt-get install util-linux nfs-common -y

diff --git a/cluster-image-builder/Dockerfile.rocm b/cluster-image-builder/Dockerfile.rocm
@@ -2,8 +2,8 @@
 # default base image
 ARG VLLM_BRANCH="v0.8.5-neutree"
 ARG VLLM_REPO="https://github.com/neutree-ai/vllm.git"
-ARG RAY_BRANCH="ray-2.43.0-neutree"
-ARG RAY_REPO="https://github.com/neutree-ai/ray.git"
+ARG RAY_COMMIT
+ARG RAY_REPO
 ARG USE_CYTHON="0"
 ARG BUILD_RPD="1"
 ARG COMMON_WORKDIR=/app
@@ -54,16 +54,16 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
 # Ray build stages
 FROM base AS ray_fetch
 ARG RAY_REPO
-ARG RAY_BRANCH
+ARG RAY_COMMIT
 RUN git clone ${RAY_REPO} \
 	    && cd ray \
-	    && git checkout ${RAY_BRANCH}
+	    && git checkout ${RAY_COMMIT}
 
 
 FROM ${RAY_BUILD_BASE_IMAGE} AS build_ray
 ARG COMMON_WORKDIR
-ARG RAY_BRANCH
-ENV BUILDKITE_COMMIT=${RAY_BRANCH}
+ARG RAY_COMMIT
+ENV BUILDKITE_COMMIT=${RAY_COMMIT}
 ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
 ENV BUILD_ONE_PYTHON_ONLY=py311
 ENV RAY_DISABLE_EXTRA_CPP=1

diff --git a/cluster-image-builder/Makefile b/cluster-image-builder/Makefile
@@ -10,11 +10,15 @@ ARCH ?= amd64
 ACCELERATORS ?= gpu amd-gpu
 ALL_ARCH ?= amd64 arm64
 
-RAY_BASE_IMAGE ?= rayproject/ray:2.43.0-py311-cu121
+RAY_BASE_IMAGE ?= rayproject/ray:2.53.0-py311-cu121
 ifeq ($(ARCH), arm64)
 RAY_BASE_IMAGE := $(RAY_BASE_IMAGE)-aarch64
 endif
 
+RAY_REPO ?= https://github.com/neutree-ai/ray.git
+RAY_VERSION ?= ray-2.53.0-neutree
+RAY_COMMIT ?= $(shell git ls-remote $(RAY_REPO) $(RAY_VERSION) | cut -f1 | head -c 7)
+
 ROCM_BASE_IMAGE ?= $(NEUTREE_SERVE_IMAGE):rocm-base
 
 .PHONY: docker-build
@@ -23,11 +27,11 @@ docker-build: ## Run docker-build-* targets for all the images
 
 .PHONY: docker-build-gpu
 docker-build-gpu: prepare ## Build the GPU image
-	docker build --build-arg RAY_BASE_IMAGE=$(RAY_BASE_IMAGE) -f Dockerfile -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG) .
+	docker build --build-arg RAY_BASE_IMAGE=$(RAY_BASE_IMAGE) --build-arg RAY_COMMIT=$(RAY_COMMIT) --build-arg RAY_REPO=$(RAY_REPO) -f Dockerfile -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG) .
 
 .PHONY: docker-build-amd-gpu
 docker-build-amd-gpu: prepare ## Build the AMD GPU image
-	docker build --build-arg BASE_IMAGE=$(ROCM_BASE_IMAGE) -f Dockerfile.rocm -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG)-rocm .
+	docker build --build-arg BASE_IMAGE=$(ROCM_BASE_IMAGE) --build-arg RAY_COMMIT=$(RAY_COMMIT) --build-arg RAY_REPO=$(RAY_REPO) -f Dockerfile.rocm -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG)-rocm .
 
 .PHONY: docker-push
 docker-push: ## Run docker-push-* targets for all the images

diff --git a/cluster-image-builder/requirements/cuda.txt b/cluster-image-builder/requirements/cuda.txt
@@ -1,5 +1,5 @@
 llama_cpp_python==0.3.7
-vllm==0.8.5
-ray[serve]==2.43.0
+vllm==0.11.2
+ray[serve]==2.53.0
 numpy==1.26.4
 opencv-python-headless==4.11.0.86
diff --git a/cluster-image-builder/requirements/cuda_arm.txt b/cluster-image-builder/requirements/cuda_arm.txt
@@ -1,4 +1,4 @@
 llama_cpp_python==0.3.7
-ray[serve]==2.43.0
+ray[serve]==2.53.0
 numpy==1.26.4
 opencv-python-headless==4.11.0.86
diff --git a/cluster-image-builder/serve/_metrics/ray_stat_logger.py b/cluster-image-builder/serve/_metrics/ray_stat_logger.py
@@ -0,0 +1,95 @@
+import logging
+
+from ray import serve
+from vllm.v1.metrics.ray_wrappers import (
+    RayPrometheusStatLogger,
+    RaySpecDecodingProm,
+    RayKVConnectorPrometheus,
+    RayGaugeWrapper,
+    RayCounterWrapper,
+    RayHistogramWrapper,
+)
+
+logger = logging.getLogger("ray.serve")
+
+
+def _make_extended_metric_cls(base_cls, extra_labels):
+    """Create a metric wrapper that transparently extends labelnames."""
+
+    class Extended(base_cls):
+        def __init__(self, name, documentation=None, labelnames=None, **kwargs):
+            extended_names = list(labelnames or []) + list(extra_labels.keys())
+            super().__init__(name=name, documentation=documentation,
+                             labelnames=extended_names, **kwargs)
+
+        def labels(self, *args, **kwargs):
+            if args:
+                args = args + tuple(extra_labels.values())
+            if kwargs:
+                kwargs.update(extra_labels)
+            return super().labels(*args, **kwargs)
+
+    return Extended
+
+
+def _make_extended_spec_decoding_cls(base_cls, extra_labels):
+    """Extend SpecDecodingProm with custom labels via its _counter_cls."""
+
+    class Extended(base_cls):
+        _counter_cls = _make_extended_metric_cls(RayCounterWrapper, extra_labels)
+
+    return Extended
+
+
+def _make_extended_kv_connector_cls(base_cls, extra_labels):
+    """Extend KVConnectorPrometheus with custom labels via its _cls vars."""
+
+    class Extended(base_cls):
+        _gauge_cls = _make_extended_metric_cls(RayGaugeWrapper, extra_labels)
+        _counter_cls = _make_extended_metric_cls(RayCounterWrapper, extra_labels)
+        _histogram_cls = _make_extended_metric_cls(RayHistogramWrapper, extra_labels)
+
+    return Extended
+
+
+class NeutreeRayStatLogger(RayPrometheusStatLogger):
+    """RayPrometheusStatLogger with Ray Serve context labels injected.
+
+    Transparently extends all vLLM metrics with deployment, replica,
+    and application labels from the Ray Serve replica context.
+    """
+
+    def __init__(self, vllm_config, engine_indexes=None):
+        extra_labels = {}
+        try:
+            ctx = serve.get_replica_context()
+            extra_labels = {
+                "deployment": ctx.deployment,
+                "replica": ctx.replica_tag,
+            }
+            if hasattr(ctx, "app_name"):
+                extra_labels["application"] = ctx.app_name
+        except RuntimeError:
+            logger.warning(
+                "NeutreeRayStatLogger: not running in Ray Serve context, "
+                "skipping custom labels"
+            )
+
+        if extra_labels:
+            self._gauge_cls = _make_extended_metric_cls(
+                RayGaugeWrapper, extra_labels)
+            self._counter_cls = _make_extended_metric_cls(
+                RayCounterWrapper, extra_labels)
+            self._histogram_cls = _make_extended_metric_cls(
+                RayHistogramWrapper, extra_labels)
+            self._spec_decoding_cls = _make_extended_spec_decoding_cls(
+                RaySpecDecodingProm, extra_labels)
+            self._kv_connector_cls = _make_extended_kv_connector_cls(
+                RayKVConnectorPrometheus, extra_labels)
+
+        super().__init__(vllm_config, engine_indexes)
+
+        logger.info(
+            f"NeutreeRayStatLogger initialized with extra labels: "
+            f"{list(extra_labels.keys())}"
+        )