Skip to content

Commit 48792f9

Browse files
committed
sanic server script
Signed-off-by: guangli.bao <[email protected]>
1 parent 59c1be8 commit 48792f9

File tree

2 files changed

+243
-1
lines changed

2 files changed

+243
-1
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ exclude = ["venv", ".tox"]
143143
follow_imports = 'silent'
144144

145145
[[tool.mypy.overrides]]
146-
module = ["datasets.*", "transformers.*", "setuptools.*", "setuptools_git_versioning.*"]
146+
module = ["datasets.*", "transformers.*", "setuptools.*", "setuptools_git_versioning.*", "sanic.*"]
147147
ignore_missing_imports=true
148148

149149

tests/unit/sanic_server.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
#!/usr/bin/env python3
2+
"""
3+
sanic_server.py
4+
5+
Simple Sanic-based mock server that implements common OpenAI / vLLM-compatible routes:
6+
- GET / : health
7+
- GET /v1/models : list models
8+
- POST /v1/chat/completions : chat completions (supports streaming via ?stream=true)
9+
- POST /v1/completions : classic completions
10+
- POST /v1/embeddings : fake embeddings
11+
- POST /v1/moderations : fake moderation
12+
13+
Usage:
14+
pip install sanic==25.3.0 or latest
15+
Command:
16+
python sanic_server.py or \
17+
python sanic_server.py --host=0.0.0.0 --port=8000 --workers=1 --debug
18+
"""
19+
20+
import argparse
21+
import asyncio
22+
import json
23+
import random
24+
25+
from sanic import Sanic
26+
from sanic.request import Request
27+
from sanic.response import ResponseStream
28+
from sanic.response import json as sjson
29+
30+
app = Sanic("sanic_server")
31+
32+
33+
# ---------- utils ----------
34+
35+
36+
def fake_tokenize(text: str) -> list[str]:
37+
# crude whitespace tokenizer for token counting
38+
return text.strip().split()
39+
40+
41+
def make_choice_text(prompt: str) -> str:
42+
# Very simple deterministic reply generator
43+
# Echo some truncated summary for testing
44+
tail = prompt.strip()[:120]
45+
return f"Mock reply summarizing: {tail}"
46+
47+
48+
def now_ms() -> int:
49+
return int(asyncio.get_event_loop().time() * 1000)
50+
51+
52+
# ---------- routes ----------
53+
54+
55+
@app.get("/")
56+
async def health(request: Request):
57+
return sjson({"ok": True, "msg": "mock openai/vllm server"})
58+
59+
60+
@app.get("/v1/models")
61+
async def list_models(request: Request):
62+
# minimal model list
63+
models = [
64+
{"id": "mock-qwen-2.5", "object": "model", "owned_by": "mock"},
65+
{"id": "facebook/opt-125m", "object": "model", "owned_by": "mock"},
66+
]
67+
return sjson({"object": "list", "data": models})
68+
69+
70+
@app.post("/v1/chat/completions")
71+
async def chat_completions(request: Request):
72+
"""
73+
Supports:
74+
- JSON body with 'messages' (OpenAI format)
75+
- query param stream=true or JSON {'stream': true}
76+
=> responds with text/event-stream chunks containing 'data: {json}\n\n'
77+
"""
78+
body = request.json or {}
79+
stream_mode = False
80+
if request.args.get("stream", "false").lower() == "true":
81+
stream_mode = True
82+
83+
messages = body.get("messages", [])
84+
prompt_text = ""
85+
if isinstance(messages, list) and messages:
86+
# approximate prompt as concatenation of last user message(s)
87+
for m in messages:
88+
role = m.get("role", "")
89+
content = m.get("content", "")
90+
if role == "user":
91+
prompt_text += content + " "
92+
93+
# build a deterministic reply
94+
reply = make_choice_text(prompt_text or "hello")
95+
prompt_tokens = len(fake_tokenize(prompt_text))
96+
completion_tokens = len(fake_tokenize(reply))
97+
98+
# create response object (non-streaming)
99+
def make_response_obj():
100+
return {
101+
"id": f"cmpl-mock-{random.randint(1000, 9999)}",
102+
"object": "chat.completion",
103+
"created": now_ms(),
104+
"model": body.get("model", "mock-qwen-2.5"),
105+
"usage": {
106+
"prompt_tokens": prompt_tokens,
107+
"completion_tokens": completion_tokens,
108+
"total_tokens": prompt_tokens + completion_tokens,
109+
},
110+
"choices": [
111+
{
112+
"index": 0,
113+
"message": {"role": "assistant", "content": reply},
114+
"finish_reason": "stop",
115+
}
116+
],
117+
}
118+
119+
if not stream_mode:
120+
return sjson(make_response_obj())
121+
122+
# streaming mode: SSE-style chunks with 'data: <json>\n\n'
123+
async def streaming_fn(resp):
124+
# send an initial "response.start" like chunk
125+
await resp.write(
126+
f"data: \
127+
{json.dumps({'type': 'response.start', 'created': now_ms()})}\n\n"
128+
)
129+
130+
# simulate token-by-token streaming
131+
tokens = fake_tokenize(reply)
132+
chunk_text = ""
133+
for i, tk in enumerate(tokens):
134+
chunk_text += tk + (" " if i < len(tokens) - 1 else "")
135+
chunk_payload = {
136+
"id": f"cmpl-mock-{random.randint(1000, 9999)}",
137+
"object": "chat.completion.chunk",
138+
"created": now_ms(),
139+
"model": body.get("model", "mock-qwen-2.5"),
140+
"choices": [
141+
{
142+
"delta": {"content": tk + (" " if i < len(tokens) - 1 else "")},
143+
"index": 0,
144+
"finish_reason": None,
145+
}
146+
],
147+
}
148+
# write chunk
149+
await resp.write(f"data: {json.dumps(chunk_payload)}\n\n")
150+
# small jitter between tokens
151+
await asyncio.sleep(0.03)
152+
# final done event
153+
done_payload = {"type": "response.done", "created": now_ms()}
154+
await resp.write(f"data: {json.dumps(done_payload)}\n\n")
155+
156+
headers = {"Content-Type": "text/event-stream", "Cache-Control": "no-cache"}
157+
return ResponseStream(streaming_fn, headers=headers)
158+
159+
160+
@app.post("/v1/completions")
161+
async def completions(request: Request):
162+
body = request.json or {}
163+
prompt = body.get("prompt") or (
164+
body.get("messages")
165+
and " ".join([m.get("content", "") for m in body.get("messages", [])])
166+
)
167+
if not prompt:
168+
prompt = "hello"
169+
# optional max_tokens
170+
max_tokens = int(body.get("max_tokens", 64))
171+
reply = make_choice_text(prompt)
172+
tokenized = fake_tokenize(reply)[:max_tokens]
173+
text_out = " ".join(tokenized)
174+
175+
prompt_tokens = len(fake_tokenize(prompt))
176+
completion_tokens = len(tokenized)
177+
178+
resp = {
179+
"id": f"cmpl-mock-{random.randint(1000, 9999)}",
180+
"object": "text_completion",
181+
"created": now_ms(),
182+
"model": body.get("model", "mock-qwen-2.5"),
183+
"choices": [{"text": text_out, "index": 0, "finish_reason": "stop"}],
184+
"usage": {
185+
"prompt_tokens": prompt_tokens,
186+
"completion_tokens": completion_tokens,
187+
"total_tokens": prompt_tokens + completion_tokens,
188+
},
189+
}
190+
# simulate a small server-side latency
191+
await asyncio.sleep(0.01)
192+
return sjson(resp)
193+
194+
195+
@app.post("/v1/embeddings")
196+
async def embeddings(request: Request):
197+
body = request.json or {}
198+
inputs = body.get("input") or body.get("inputs") or []
199+
if isinstance(inputs, str):
200+
inputs = [inputs]
201+
# produce deterministic embedding length 16
202+
dim = int(request.args.get("dim", body.get("dim", 16)))
203+
out = []
204+
for i, txt in enumerate(inputs):
205+
# make pseudo-random but deterministic numbers based on hash
206+
seed = abs(hash(txt)) % (10**8)
207+
random.seed(seed)
208+
vec = [round((random.random() - 0.5), 6) for _ in range(dim)]
209+
out.append({"object": "embedding", "embedding": vec, "index": i})
210+
return sjson({"data": out, "model": body.get("model", "mock-embed-1")})
211+
212+
213+
@app.post("/v1/moderations")
214+
async def moderations(request: Request):
215+
body = request.json or {}
216+
input_text = body.get("input") or ""
217+
# super naive: classify as 'flagged' if contains "bad"
218+
flagged = "bad" in input_text.lower()
219+
return sjson(
220+
{
221+
"id": "mod-mock-1",
222+
"model": body.get("model", "mock-moderation"),
223+
"results": [{"flagged": flagged}],
224+
}
225+
)
226+
227+
228+
if __name__ == "__main__":
229+
parser = argparse.ArgumentParser(prog="sanic_server")
230+
parser.add_argument("--host", default="127.0.0.1")
231+
parser.add_argument("--port", default=8000, type=int)
232+
parser.add_argument("--debug", action="store_true")
233+
parser.add_argument("--workers", default=1, type=int)
234+
args = parser.parse_args()
235+
236+
app.run(
237+
host=args.host,
238+
port=args.port,
239+
debug=args.debug,
240+
workers=args.workers,
241+
access_log=False,
242+
)

0 commit comments

Comments
 (0)