Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion lightllm/server/api_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@
from lightllm.utils.envs_utils import get_unique_server_name
from dataclasses import dataclass

from .api_openai import chat_completions_impl
from .api_openai import chat_completions_impl, completions_impl
from .api_models import (
ChatCompletionRequest,
ChatCompletionResponse,
CompletionRequest,
CompletionResponse,
)
from .build_prompt import build_prompt, init_tokenizer

Expand Down Expand Up @@ -223,6 +225,12 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request)
return resp


@app.post("/v1/completions", response_model=CompletionResponse)
async def completions(request: CompletionRequest, raw_request: Request) -> Response:
resp = await completions_impl(request, raw_request)
return resp


@app.get("/tokens")
@app.post("/tokens")
async def tokens(request: Request):
Expand Down
72 changes: 72 additions & 0 deletions lightllm/server/api_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,32 @@ class ToolChoice(BaseModel):
type: Literal["function"] = Field(default="function", examples=["function"])


class CompletionRequest(BaseModel):
model: str
# prompt: string or tokens
prompt: Union[str, List[str], List[int], List[List[int]]]
suffix: Optional[str] = None
max_tokens: Optional[int] = 16
temperature: Optional[float] = 1.0
top_p: Optional[float] = 1.0
n: Optional[int] = 1
stream: Optional[bool] = False
logprobs: Optional[int] = None
echo: Optional[bool] = False
stop: Optional[Union[str, List[str]]] = None
presence_penalty: Optional[float] = 0.0
frequency_penalty: Optional[float] = 0.0
best_of: Optional[int] = 1
logit_bias: Optional[Dict[str, float]] = None
user: Optional[str] = None

# Additional parameters supported by LightLLM
do_sample: Optional[bool] = False
top_k: Optional[int] = -1
repetition_penalty: Optional[float] = 1.0
ignore_eos: Optional[bool] = False


class ChatCompletionRequest(BaseModel):
model: str
messages: List[Message]
Expand Down Expand Up @@ -148,3 +174,49 @@ class ChatCompletionStreamResponse(BaseModel):
@field_validator("id", mode="before")
def ensure_id_is_str(cls, v):
return str(v)


class CompletionLogprobs(BaseModel):
tokens: List[str] = []
token_logprobs: List[Optional[float]] = []
top_logprobs: List[Optional[Dict[str, float]]] = []
text_offset: List[int] = []


class CompletionChoice(BaseModel):
text: str
index: int
logprobs: Optional["CompletionLogprobs"] = None
finish_reason: Optional[Literal["stop", "length"]] = None


class CompletionResponse(BaseModel):
id: str = Field(default_factory=lambda: f"cmpl-{uuid.uuid4().hex}")
object: str = "text_completion"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: List[CompletionChoice]
usage: UsageInfo

@field_validator("id", mode="before")
def ensure_id_is_str(cls, v):
return str(v)


class CompletionStreamChoice(BaseModel):
text: str
index: int
logprobs: Optional[Dict] = None
finish_reason: Optional[Literal["stop", "length"]] = None


class CompletionStreamResponse(BaseModel):
id: str = Field(default_factory=lambda: f"cmpl-{uuid.uuid4().hex}")
object: str = "text_completion"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: List[CompletionStreamChoice]

@field_validator("id", mode="before")
def ensure_id_is_str(cls, v):
return str(v)
Loading