|
1 | 1 | # SPDX-License-Identifier: Apache-2.0
|
2 | 2 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
| 3 | +# Adapted from: |
| 4 | +# https://github.com/vllm/vllm/entrypoints/openai/api_server.py |
3 | 5 |
|
4 | 6 | import asyncio
|
5 |
| -import atexit |
6 |
| -import gc |
7 |
| -import importlib |
8 |
| -import inspect |
9 |
| -import json |
10 |
| -import multiprocessing |
11 |
| -import os |
12 | 7 | import signal
|
13 |
| -import socket |
14 | 8 | import tempfile
|
15 |
| -import uuid |
16 | 9 | from argparse import Namespace
|
17 |
| -from collections.abc import AsyncIterator, Awaitable |
18 |
| -from contextlib import asynccontextmanager |
19 |
| -from functools import partial |
20 | 10 | from http import HTTPStatus
|
21 |
| -from typing import Annotated, Any, Callable, Optional |
| 11 | +from typing import Optional |
22 | 12 |
|
23 |
| -import pydantic |
24 | 13 | import uvloop
|
25 |
| -from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request |
26 |
| -from fastapi.exceptions import RequestValidationError |
| 14 | +from fastapi import APIRouter, Depends, FastAPI, Request |
27 | 15 | from fastapi.middleware.cors import CORSMiddleware
|
28 | 16 | from fastapi.responses import JSONResponse, Response, StreamingResponse
|
29 |
| -from starlette.concurrency import iterate_in_threadpool |
30 |
| -from starlette.datastructures import URL, Headers, MutableHeaders, State |
31 |
| -from starlette.types import ASGIApp, Message, Receive, Scope, Send |
| 17 | +from starlette.datastructures import State |
32 | 18 |
|
33 | 19 | import vllm.envs as envs
|
34 | 20 | from vllm.config import VllmConfig
|
35 |
| -from vllm.engine.arg_utils import AsyncEngineArgs |
36 | 21 | from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
|
37 |
| -from vllm.engine.multiprocessing.client import MQLLMEngineClient |
38 |
| -from vllm.engine.multiprocessing.engine import run_mp_engine |
39 | 22 | from vllm.engine.protocol import EngineClient
|
40 | 23 | from vllm.entrypoints.anthropic.protocol import AnthropicErrorResponse, AnthropicMessagesRequest, \
|
41 | 24 | AnthropicMessagesResponse
|
|
49 | 32 | lifespan, build_async_engine_client, validate_json_request
|
50 | 33 | from vllm.entrypoints.openai.cli_args import (make_arg_parser,
|
51 | 34 | validate_parsed_serve_args)
|
52 |
| -from vllm.entrypoints.openai.protocol import ChatCompletionRequest, ErrorResponse |
| 35 | +from vllm.entrypoints.openai.protocol import ErrorResponse |
53 | 36 | from vllm.entrypoints.openai.serving_models import OpenAIServingModels, BaseModelPath, LoRAModulePath
|
54 | 37 | #
|
55 | 38 | # yapf: enable
|
56 | 39 | from vllm.entrypoints.openai.tool_parsers import ToolParserManager
|
57 | 40 | from vllm.entrypoints.utils import (cli_env_setup, load_aware_call,
|
58 | 41 | with_cancellation)
|
59 | 42 | from vllm.logger import init_logger
|
60 |
| -from vllm.reasoning import ReasoningParserManager |
61 |
| -from vllm.transformers_utils.config import ( |
62 |
| - maybe_register_config_serialize_by_value) |
63 | 43 | from vllm.transformers_utils.tokenizer import MistralTokenizer
|
64 |
| -from vllm.usage.usage_lib import UsageContext |
65 |
| -from vllm.utils import (Device, FlexibleArgumentParser, |
66 |
| - get_open_zmq_ipc_path, is_valid_ipv6_address, |
| 44 | +from vllm.utils import (FlexibleArgumentParser, |
| 45 | + is_valid_ipv6_address, |
67 | 46 | set_ulimit)
|
68 | 47 | from vllm.version import __version__ as VLLM_VERSION
|
69 | 48 |
|
|
0 commit comments