Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/litserve/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,8 @@ def _register_spec_endpoints(self, lit_api: LitAPI):
specs = [lit_api.spec] if lit_api.spec else []
for spec in specs:
spec: LitSpec
# Set the server reference for callback triggering in spec endpoints
spec._server = self
# TODO check that path is not clashing
for path, endpoint, methods in spec.endpoints:
self.app.add_api_route(
Expand Down
6 changes: 6 additions & 0 deletions src/litserve/specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def __init__(self):
self.request_queue = None
self.response_queue_id = None

def __getstate__(self):
"""Exclude _server from pickling as it contains unpickleable objects."""
state = self.__dict__.copy()
state["_server"] = None
return state

@property
def stream(self):
return False
Expand Down
9 changes: 9 additions & 0 deletions src/litserve/specs/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field

from litserve.callbacks.base import EventTypes
from litserve.constants import _DEFAULT_LIT_API_PATH
from litserve.specs.base import LitSpec, _AsyncSpecWrapper
from litserve.utils import LitAPIStatus, ResponseBufferItem, azip
Expand Down Expand Up @@ -502,6 +503,14 @@ async def chat_completion(self, request: ChatCompletionRequest, background_tasks
uids = [uuid.uuid4() for _ in range(request.n)]
self.queues = []
self.events = []

# Trigger callback
self._server._callback_runner.trigger_event(
EventTypes.ON_REQUEST.value,
active_requests=self._server.active_requests,
litserver=self._server,
)

for uid in uids:
request_el = request.model_copy()
request_el.n = 1
Expand Down
8 changes: 8 additions & 0 deletions src/litserve/specs/openai_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from fastapi import status as status_code
from pydantic import BaseModel

from litserve.callbacks.base import EventTypes
from litserve.constants import _DEFAULT_LIT_API_PATH
from litserve.specs.base import LitSpec
from litserve.utils import LitAPIStatus, ResponseBufferItem
Expand Down Expand Up @@ -261,6 +262,13 @@ async def embeddings_endpoint(self, request: EmbeddingRequest) -> EmbeddingRespo
event = asyncio.Event()
self.response_buffer[uid] = ResponseBufferItem(event=event)

# Trigger callback
self._server._callback_runner.trigger_event(
EventTypes.ON_REQUEST.value,
active_requests=self._server.active_requests,
litserver=self._server,
)

self.request_queue.put_nowait((response_queue_id, uid, time.monotonic(), request.model_copy()))
await event.wait()

Expand Down
42 changes: 42 additions & 0 deletions tests/unit/test_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,45 @@ async def test_request_tracker(capfd):
await run_simple_request(server, 4)
captured = capfd.readouterr()
assert "Active requests: 4" in captured.out, f"Expected pattern not found in output: {captured.out}"


@pytest.mark.asyncio
async def test_request_tracker_with_spec(capfd):
from litserve.specs.openai_embedding import OpenAIEmbeddingSpec
from litserve.test_examples.openai_embedding_spec_example import TestEmbedAPI

lit_api = TestEmbedAPI(spec=OpenAIEmbeddingSpec())
server = ls.LitServer(lit_api, track_requests=True, callbacks=[RequestTracker()])

with wrap_litserve_start(server) as server:
async with (
LifespanManager(server.app) as manager,
AsyncClient(transport=ASGITransport(app=manager.app), base_url="http://test") as ac,
):
resp = await ac.post("/v1/embeddings", json={"input": "test", "model": "test"})
assert resp.status_code == 200

captured = capfd.readouterr()
assert "Active requests: 1" in captured.out, f"Expected pattern not found in output: {captured.out}"


@pytest.mark.asyncio
async def test_request_tracker_with_openai_spec(capfd):
from litserve.specs.openai import OpenAISpec
from litserve.test_examples.openai_spec_example import TestAPI

lit_api = TestAPI(spec=OpenAISpec())
server = ls.LitServer(lit_api, track_requests=True, callbacks=[RequestTracker()])

with wrap_litserve_start(server) as server:
async with (
LifespanManager(server.app) as manager,
AsyncClient(transport=ASGITransport(app=manager.app), base_url="http://test") as ac,
):
resp = await ac.post(
"/v1/chat/completions", json={"messages": [{"role": "user", "content": "test"}], "model": "test"}
)
assert resp.status_code == 200

captured = capfd.readouterr()
assert "Active requests: 1" in captured.out, f"Expected pattern not found in output: {captured.out}"
Loading