diff --git a/src/vllm_router/app.py b/src/vllm_router/app.py index 0135cf8ee..ba65ddcab 100644 --- a/src/vllm_router/app.py +++ b/src/vllm_router/app.py @@ -28,6 +28,7 @@ ) from vllm_router.experimental import get_feature_gates, initialize_feature_gates from vllm_router.parsers.parser import parse_args +from vllm_router.routers.anthropic_router import anthropic_router from vllm_router.routers.batches_router import batches_router from vllm_router.routers.files_router import files_router from vllm_router.routers.main_router import main_router @@ -321,6 +322,7 @@ def initialize_all(app: FastAPI, args): app = FastAPI(lifespan=lifespan) app.include_router(main_router) +app.include_router(anthropic_router) app.include_router(files_router) app.include_router(batches_router) app.include_router(metrics_router) diff --git a/src/vllm_router/routers/anthropic_router.py b/src/vllm_router/routers/anthropic_router.py new file mode 100644 index 000000000..0c19c2cba --- /dev/null +++ b/src/vllm_router/routers/anthropic_router.py @@ -0,0 +1,26 @@ +# Copyright 2024-2025 The vLLM Production Stack Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from fastapi import APIRouter, BackgroundTasks, Request + +from vllm_router.log import init_logger +from vllm_router.services.request_service.request import route_general_request + +logger = init_logger(__name__) +anthropic_router = APIRouter() + + +@anthropic_router.post("/v1/messages") +async def route_anthropic_messages(request: Request, background_tasks: BackgroundTasks): + """Route Anthropic-compatible messages requests to the backend.""" + return await route_general_request(request, "/v1/messages", background_tasks) diff --git a/src/vllm_router/utils.py b/src/vllm_router/utils.py index 17b4464e0..ee2b0af15 100644 --- a/src/vllm_router/utils.py +++ b/src/vllm_router/utils.py @@ -73,6 +73,7 @@ class ModelType(enum.Enum): score = "score" transcription = "transcription" vision = "vision" + messages = "messages" @staticmethod def get_url(model_type: str): @@ -89,6 +90,8 @@ def get_url(model_type: str): return "/v1/score" case ModelType.transcription: return "/v1/audio/transcriptions" + case ModelType.messages: + return "/v1/messages" @staticmethod def get_test_payload(model_type: str): @@ -112,6 +115,17 @@ def get_test_payload(model_type: str): return {"query": "Hello", "documents": ["Test"]} case ModelType.score: return {"encoding_format": "float", "text_1": "Test", "test_2": "Test2"} + case ModelType.messages: + return { + "messages": [ + { + "role": "user", + "content": "Hello", + } + ], + "temperature": 0.0, + "max_tokens": 3, + } case ModelType.transcription: if _SILENT_WAV_BYTES is not None: logger.debug("=====Silent WAV Bytes is being used=====")