neutree-ai
diff --git a/‎.github/workflows/release-serve.yaml‎
Lines changed: 12 additions & 2 deletions b/‎.github/workflows/release-serve.yaml‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎cluster-image-builder/Dockerfile‎
Lines changed: 6 additions & 6 deletions b/‎cluster-image-builder/Dockerfile‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎cluster-image-builder/Dockerfile.rocm‎
Lines changed: 6 additions & 6 deletions b/‎cluster-image-builder/Dockerfile.rocm‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎cluster-image-builder/Makefile‎
Lines changed: 7 additions & 3 deletions b/‎cluster-image-builder/Makefile‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎cluster-image-builder/accelerator/amd_gpu.py‎
Lines changed: 3 additions & 3 deletions b/‎cluster-image-builder/accelerator/amd_gpu.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cluster-image-builder/accelerator/gpu.py‎
Lines changed: 2 additions & 2 deletions b/‎cluster-image-builder/accelerator/gpu.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cluster-image-builder/requirements/cuda.txt‎
Lines changed: 2 additions & 2 deletions b/‎cluster-image-builder/requirements/cuda.txt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cluster-image-builder/requirements/cuda_arm.txt‎
Lines changed: 1 addition & 1 deletion b/‎cluster-image-builder/requirements/cuda_arm.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cluster-image-builder/serve/_metrics/ray_stat_logger.py‎
Lines changed: 95 additions & 0 deletions b/‎cluster-image-builder/serve/_metrics/ray_stat_logger.py‎
Lines changed: 95 additions & 0 deletions
@@ -11,6 +11,16 @@ on:
         description: "the release version (e.g : 0.1.0)"
         required: true
         type: string
+      ray_version:
+        description: "the ray version branch/tag to build from"
+        required: false
+        type: string
+        default: "ray-2.53.0-neutree"
+      accelerators:
+        description: "accelerator types to build (e.g : gpu, amd-gpu)"
+        required: false
+        type: string
+        default: "gpu"
 
 jobs:
   build-amd64-image:
@@ -28,7 +38,7 @@ jobs:
           username: ${{ secrets.SERVE_IMAGE_PUSH_USERNAME }}
           password: ${{ secrets.SERVE_IMAGE_PUSH_TOKEN }}
       - name: build amd64 image
-        run: cd cluster-image-builder; export ARCH=amd64 VERSION=${{ github.event.inputs.version }}; make docker-build && make docker-push
+        run: cd cluster-image-builder; export ARCH=amd64 VERSION=${{ github.event.inputs.version }} RAY_VERSION=${{ github.event.inputs.ray_version }} ACCELERATORS="${{ github.event.inputs.accelerators }}"; make docker-build && make docker-push
         env:
           IMAGE_PROJECT: ${{ secrets.RELEASE_SERVE_IMAGE_PROJECT }}
           IMAGE_REPO: ${{ secrets.SERVE_IMAGE_REPO }}
@@ -51,7 +61,7 @@ jobs:
       - name: push manifests
         run: |
           cd cluster-image-builder
-          export VERSION=${{ github.event.inputs.version }} ALL_ARCH=amd64; make docker-push-manifest
+          export VERSION=${{ github.event.inputs.version }} RAY_VERSION=${{ github.event.inputs.ray_version }} ALL_ARCH=amd64 ACCELERATORS="${{ github.event.inputs.accelerators }}"; make docker-push-manifest
 
         env:
           IMAGE_PROJECT: ${{ secrets.RELEASE_SERVE_IMAGE_PROJECT }}
 
@@ -35,3 +35,6 @@ scripts/dashboard/output
 
 cluster-image-builder/downloader
 scripts/builder/dist
+
+claude.md
+.claude
@@ -1,22 +1,22 @@
 ARG RAY_BASE_IMAGE
-ARG RAY_BRANCH="ray-2.43.0-neutree"
+ARG RAY_COMMIT
 ARG COMMON_WORKDIR=/app
-ARG RAY_REPO="https://github.com/neutree-ai/ray.git"
+ARG RAY_REPO
 ARG RAY_BUILD_BASE_IMAGE="quay.io/pypa/manylinux2014_x86_64:2024-07-02-9ac04ee"
 
 FROM alpine/git:v2.47.2 as ray_fetch
 ARG RAY_REPO
-ARG RAY_BRANCH
+ARG RAY_COMMIT
 ARG COMMON_WORKDIR
 WORKDIR ${COMMON_WORKDIR}
 RUN git clone ${RAY_REPO} \
 	    && cd ray \
-	    && git checkout ${RAY_BRANCH}
+	    && git checkout ${RAY_COMMIT}
 
 FROM ${RAY_BUILD_BASE_IMAGE} AS build_ray
 ARG COMMON_WORKDIR
-ARG RAY_BRANCH
-ENV BUILDKITE_COMMIT=${RAY_BRANCH}
+ARG RAY_COMMIT
+ENV BUILDKITE_COMMIT=${RAY_COMMIT}
 ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
 ENV BUILD_ONE_PYTHON_ONLY=py311
 ENV RAY_DISABLE_EXTRA_CPP=1
 
@@ -2,8 +2,8 @@
 # default base image
 ARG VLLM_BRANCH="v0.8.5-neutree"
 ARG VLLM_REPO="https://github.com/neutree-ai/vllm.git"
-ARG RAY_BRANCH="ray-2.43.0-neutree"
-ARG RAY_REPO="https://github.com/neutree-ai/ray.git"
+ARG RAY_COMMIT
+ARG RAY_REPO
 ARG USE_CYTHON="0"
 ARG BUILD_RPD="1"
 ARG COMMON_WORKDIR=/app
@@ -54,16 +54,16 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
 # Ray build stages
 FROM base AS ray_fetch
 ARG RAY_REPO
-ARG RAY_BRANCH
+ARG RAY_COMMIT
 RUN git clone ${RAY_REPO} \
 	    && cd ray \
-	    && git checkout ${RAY_BRANCH}
+	    && git checkout ${RAY_COMMIT}
 
 
 FROM ${RAY_BUILD_BASE_IMAGE} AS build_ray
 ARG COMMON_WORKDIR
-ARG RAY_BRANCH
-ENV BUILDKITE_COMMIT=${RAY_BRANCH}
+ARG RAY_COMMIT
+ENV BUILDKITE_COMMIT=${RAY_COMMIT}
 ENV TRAVIS_COMMIT=${BUILDKITE_COMMIT}
 ENV BUILD_ONE_PYTHON_ONLY=py311
 ENV RAY_DISABLE_EXTRA_CPP=1
 
@@ -10,11 +10,15 @@ ARCH ?= amd64
 ACCELERATORS ?= gpu amd-gpu
 ALL_ARCH ?= amd64 arm64
 
-RAY_BASE_IMAGE ?= rayproject/ray:2.43.0-py311-cu121
+RAY_BASE_IMAGE ?= rayproject/ray:2.53.0-py311-cu121
 ifeq ($(ARCH), arm64)
 RAY_BASE_IMAGE := $(RAY_BASE_IMAGE)-aarch64
 endif
 
+RAY_REPO ?= https://github.com/neutree-ai/ray.git
+RAY_VERSION ?= ray-2.53.0-neutree
+RAY_COMMIT ?= $(shell git ls-remote $(RAY_REPO) $(RAY_VERSION) | cut -f1 | head -c 7)
+
 ROCM_BASE_IMAGE ?= $(NEUTREE_SERVE_IMAGE):rocm-base
 
 .PHONY: docker-build
@@ -23,11 +27,11 @@ docker-build: ## Run docker-build-* targets for all the images
 
 .PHONY: docker-build-gpu
 docker-build-gpu: prepare ## Build the GPU image
-	docker build --build-arg RAY_BASE_IMAGE=$(RAY_BASE_IMAGE) -f Dockerfile -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG) .
+	docker build --build-arg RAY_BASE_IMAGE=$(RAY_BASE_IMAGE) --build-arg RAY_COMMIT=$(RAY_COMMIT) --build-arg RAY_REPO=$(RAY_REPO) -f Dockerfile -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG) .
 
 .PHONY: docker-build-amd-gpu
 docker-build-amd-gpu: prepare ## Build the AMD GPU image
-	docker build --build-arg BASE_IMAGE=$(ROCM_BASE_IMAGE) -f Dockerfile.rocm -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG)-rocm .
+	docker build --build-arg BASE_IMAGE=$(ROCM_BASE_IMAGE) --build-arg RAY_COMMIT=$(RAY_COMMIT) --build-arg RAY_REPO=$(RAY_REPO) -f Dockerfile.rocm -t $(NEUTREE_SERVE_IMAGE)-$(ARCH):$(IMAGE_TAG)-rocm .
 
 .PHONY: docker-push
 docker-push: ## Run docker-push-* targets for all the images
 
@@ -37,9 +37,9 @@ def get_accelerator_counts():
             market_name = device.get("market_name")
             if market_name is None:
                 continue
-            accelerator_type = market_name.replace(" ","_")
-            if not accelerator_type.startswith("AMD_"):
-                accelerator_type = "AMD_" + accelerator_type
+            accelerator_type = market_name.replace(" ","")
+            if not accelerator_type.startswith("AMD"):
+                accelerator_type = "AMD" + accelerator_type
             if accelerator_counts.get(accelerator_type) is None:
                 accelerator_counts[accelerator_type] = 1
             else:
 
@@ -31,9 +31,9 @@ def count_nvidia_accelerators(gpu_names):
     """
     accelerator_counts = {}
     for gpu in gpu_names:
-        accelerator_type = gpu.replace(" ","_")
+        accelerator_type = gpu.replace(" ","")
         if not accelerator_type.startswith("NVIDIA_"):
-            accelerator_type = "NVIDIA_" + accelerator_type
+            accelerator_type = "NVIDIA" + accelerator_type
         if accelerator_counts.get(accelerator_type) is None:
             accelerator_counts[accelerator_type] = 1
         else:
 
@@ -1,5 +1,5 @@
 llama_cpp_python==0.3.7
-vllm==0.8.5
-ray[serve]==2.43.0
+vllm==0.11.2
+ray[serve]==2.53.0
 numpy==1.26.4
 opencv-python-headless==4.11.0.86
@@ -1,4 +1,4 @@
 llama_cpp_python==0.3.7
-ray[serve]==2.43.0
+ray[serve]==2.53.0
 numpy==1.26.4
 opencv-python-headless==4.11.0.86
@@ -0,0 +1,95 @@
+import logging
+
+from ray import serve
+from vllm.v1.metrics.ray_wrappers import (
+    RayPrometheusStatLogger,
+    RaySpecDecodingProm,
+    RayKVConnectorPrometheus,
+    RayGaugeWrapper,
+    RayCounterWrapper,
+    RayHistogramWrapper,
+)
+
+logger = logging.getLogger("ray.serve")
+
+
+def _make_extended_metric_cls(base_cls, extra_labels):
+    """Create a metric wrapper that transparently extends labelnames."""
+
+    class Extended(base_cls):
+        def __init__(self, name, documentation=None, labelnames=None, **kwargs):
+            extended_names = list(labelnames or []) + list(extra_labels.keys())
+            super().__init__(name=name, documentation=documentation,
+                             labelnames=extended_names, **kwargs)
+
+        def labels(self, *args, **kwargs):
+            if args:
+                args = args + tuple(extra_labels.values())
+            if kwargs:
+                kwargs.update(extra_labels)
+            return super().labels(*args, **kwargs)
+
+    return Extended
+
+
+def _make_extended_spec_decoding_cls(base_cls, extra_labels):
+    """Extend SpecDecodingProm with custom labels via its _counter_cls."""
+
+    class Extended(base_cls):
+        _counter_cls = _make_extended_metric_cls(RayCounterWrapper, extra_labels)
+
+    return Extended
+
+
+def _make_extended_kv_connector_cls(base_cls, extra_labels):
+    """Extend KVConnectorPrometheus with custom labels via its _cls vars."""
+
+    class Extended(base_cls):
+        _gauge_cls = _make_extended_metric_cls(RayGaugeWrapper, extra_labels)
+        _counter_cls = _make_extended_metric_cls(RayCounterWrapper, extra_labels)
+        _histogram_cls = _make_extended_metric_cls(RayHistogramWrapper, extra_labels)
+
+    return Extended
+
+
+class NeutreeRayStatLogger(RayPrometheusStatLogger):
+    """RayPrometheusStatLogger with Ray Serve context labels injected.
+
+    Transparently extends all vLLM metrics with deployment, replica,
+    and application labels from the Ray Serve replica context.
+    """
+
+    def __init__(self, vllm_config, engine_indexes=None):
+        extra_labels = {}
+        try:
+            ctx = serve.get_replica_context()
+            extra_labels = {
+                "deployment": ctx.deployment,
+                "replica": ctx.replica_tag,
+            }
+            if hasattr(ctx, "app_name"):
+                extra_labels["application"] = ctx.app_name
+        except RuntimeError:
+            logger.warning(
+                "NeutreeRayStatLogger: not running in Ray Serve context, "
+                "skipping custom labels"
+            )
+
+        if extra_labels:
+            self._gauge_cls = _make_extended_metric_cls(
+                RayGaugeWrapper, extra_labels)
+            self._counter_cls = _make_extended_metric_cls(
+                RayCounterWrapper, extra_labels)
+            self._histogram_cls = _make_extended_metric_cls(
+                RayHistogramWrapper, extra_labels)
+            self._spec_decoding_cls = _make_extended_spec_decoding_cls(
+                RaySpecDecodingProm, extra_labels)
+            self._kv_connector_cls = _make_extended_kv_connector_cls(
+                RayKVConnectorPrometheus, extra_labels)
+
+        super().__init__(vllm_config, engine_indexes)
+
+        logger.info(
+            f"NeutreeRayStatLogger initialized with extra labels: "
+            f"{list(extra_labels.keys())}"
+        )