-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathembedding_server.py
More file actions
37 lines (31 loc) · 1.2 KB
/
embedding_server.py
File metadata and controls
37 lines (31 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from __future__ import annotations
from fastapi import FastAPI, Request
from pydantic import BaseModel, ConfigDict
from sentence_transformers import SentenceTransformer
from typing import Any, List, Union
app = FastAPI()
model = SentenceTransformer("BAAI/bge-large-en-v1.5")
class EmbeddingRequest(BaseModel):
# OpenAI-compatible: input can be a string or a list of strings.
# (Some clients may send token arrays too; we don't support that here.)
input: Union[str, List[str]]
model: str | None = None
# Allow extra fields like encoding_format, user, dimensions, etc.
model_config = ConfigDict(extra="allow")
@app.post("/v1/embeddings")
async def embeddings(req: EmbeddingRequest, request: Request) -> dict[str, Any]:
texts: List[str]
if isinstance(req.input, str):
texts = [req.input]
else:
texts = req.input
embs = model.encode(texts, normalize_embeddings=True)
return {
"object": "list",
"data": [
{"object": "embedding", "embedding": emb.tolist(), "index": i}
for i, emb in enumerate(embs)
],
"model": req.model or "bge-large-en-v1.5",
"usage": {"prompt_tokens": 0, "total_tokens": 0},
}