Skip to content

Commit 72caf4d

Browse files
committed
support anthropic endpoint
Signed-off-by: liuli <[email protected]>
1 parent 5a3fa47 commit 72caf4d

File tree

2 files changed

+11
-41
lines changed

2 files changed

+11
-41
lines changed

vllm/entrypoints/anthropic/api_server.py

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,24 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
# Adapted from:
4+
# https://github.com/vllm/vllm/entrypoints/openai/api_server.py
35

46
import asyncio
5-
import atexit
6-
import gc
7-
import importlib
8-
import inspect
9-
import json
10-
import multiprocessing
11-
import os
127
import signal
13-
import socket
148
import tempfile
15-
import uuid
169
from argparse import Namespace
17-
from collections.abc import AsyncIterator, Awaitable
18-
from contextlib import asynccontextmanager
19-
from functools import partial
2010
from http import HTTPStatus
21-
from typing import Annotated, Any, Callable, Optional
11+
from typing import Optional
2212

23-
import pydantic
2413
import uvloop
25-
from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request
26-
from fastapi.exceptions import RequestValidationError
14+
from fastapi import APIRouter, Depends, FastAPI, Request
2715
from fastapi.middleware.cors import CORSMiddleware
2816
from fastapi.responses import JSONResponse, Response, StreamingResponse
29-
from starlette.concurrency import iterate_in_threadpool
30-
from starlette.datastructures import URL, Headers, MutableHeaders, State
31-
from starlette.types import ASGIApp, Message, Receive, Scope, Send
17+
from starlette.datastructures import State
3218

3319
import vllm.envs as envs
3420
from vllm.config import VllmConfig
35-
from vllm.engine.arg_utils import AsyncEngineArgs
3621
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
37-
from vllm.engine.multiprocessing.client import MQLLMEngineClient
38-
from vllm.engine.multiprocessing.engine import run_mp_engine
3922
from vllm.engine.protocol import EngineClient
4023
from vllm.entrypoints.anthropic.protocol import AnthropicErrorResponse, AnthropicMessagesRequest, \
4124
AnthropicMessagesResponse
@@ -49,21 +32,17 @@
4932
lifespan, build_async_engine_client, validate_json_request
5033
from vllm.entrypoints.openai.cli_args import (make_arg_parser,
5134
validate_parsed_serve_args)
52-
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, ErrorResponse
35+
from vllm.entrypoints.openai.protocol import ErrorResponse
5336
from vllm.entrypoints.openai.serving_models import OpenAIServingModels, BaseModelPath, LoRAModulePath
5437
#
5538
# yapf: enable
5639
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
5740
from vllm.entrypoints.utils import (cli_env_setup, load_aware_call,
5841
with_cancellation)
5942
from vllm.logger import init_logger
60-
from vllm.reasoning import ReasoningParserManager
61-
from vllm.transformers_utils.config import (
62-
maybe_register_config_serialize_by_value)
6343
from vllm.transformers_utils.tokenizer import MistralTokenizer
64-
from vllm.usage.usage_lib import UsageContext
65-
from vllm.utils import (Device, FlexibleArgumentParser,
66-
get_open_zmq_ipc_path, is_valid_ipv6_address,
44+
from vllm.utils import (FlexibleArgumentParser,
45+
is_valid_ipv6_address,
6746
set_ulimit)
6847
from vllm.version import __version__ as VLLM_VERSION
6948

vllm/entrypoints/anthropic/serving_messages.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,13 @@
44
# https://github.com/vllm/vllm/entrypoints/openai/serving_chat.py
55

66
"""Anthropic Messages API serving handler"""
7-
import asyncio
8-
import copy
97
import json
108
import logging
119
import time
12-
from typing import Any, AsyncGenerator, Dict, List, Optional, Union, Final, AsyncIterator
13-
import uuid
14-
import jinja2
10+
from typing import AsyncGenerator, List, Optional, Union
1511

1612
from fastapi import Request
1713

18-
from vllm import SamplingParams, RequestOutput
1914
from vllm.config import ModelConfig
2015
from vllm.engine.protocol import EngineClient
2116
from vllm.entrypoints.anthropic.protocol import (
@@ -26,17 +21,13 @@
2621
AnthropicStreamEvent,
2722
AnthropicUsage, AnthropicError,
2823
)
29-
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption, ConversationMessage, ChatCompletionMessageParam
24+
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
3025
from vllm.entrypoints.logger import RequestLogger
31-
from vllm.entrypoints.openai.protocol import ErrorResponse, RequestResponseMetadata, ChatCompletionRequest, \
26+
from vllm.entrypoints.openai.protocol import ErrorResponse, ChatCompletionRequest, \
3227
ChatCompletionNamedToolChoiceParam, ChatCompletionToolsParam, ChatCompletionResponse, ChatCompletionStreamResponse, \
3328
StreamOptions
34-
3529
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
3630
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
37-
from vllm.entrypoints.utils import get_max_tokens
38-
from vllm.sampling_params import BeamSearchParams
39-
from vllm.transformers_utils.tokenizer import AnyTokenizer
4031

4132
logger = logging.getLogger(__name__)
4233

0 commit comments

Comments
 (0)