Skip to content

Commit 1de1c64

Browse files
committed
Implement multiturn history in openai backend
Signed-off-by: Samuel Monson <[email protected]>
1 parent e276f6c commit 1de1c64

File tree

2 files changed

+24
-7
lines changed

2 files changed

+24
-7
lines changed

src/guidellm/backends/openai.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import json
1717
import time
1818
from collections.abc import AsyncIterator
19+
from itertools import chain
1920
from pathlib import Path
2021
from typing import Any, ClassVar, Optional, Union
2122

@@ -29,7 +30,7 @@
2930
GenerationRequestTimings,
3031
GenerationResponse,
3132
)
32-
from guidellm.scheduler import ScheduledRequestInfo
33+
from guidellm.scheduler import HistoryT, ScheduledRequestInfo
3334

3435
__all__ = ["OpenAIHTTPBackend", "UsageStats"]
3536

@@ -280,7 +281,7 @@ async def resolve(
280281
self,
281282
request: GenerationRequest,
282283
request_info: ScheduledRequestInfo,
283-
history: Optional[list[tuple[GenerationRequest, GenerationResponse]]] = None,
284+
history: Optional[HistoryT[GenerationRequest, GenerationResponse]] = None,
284285
) -> AsyncIterator[tuple[GenerationResponse, ScheduledRequestInfo]]:
285286
"""
286287
Process a generation request and yield progressive responses.
@@ -295,10 +296,8 @@ async def resolve(
295296
:yields: Tuples of (response, updated_request_info) as generation progresses.
296297
"""
297298
self._check_in_process()
298-
if history is not None:
299-
raise NotImplementedError(
300-
"Multi-turn requests with conversation history are not yet supported"
301-
)
299+
if history:
300+
request = self._apply_history(request, history)
302301

303302
response = GenerationResponse(
304303
request_id=request.request_id,
@@ -500,6 +499,22 @@ async def chat_completions(
500499
self._get_completions_usage_stats(data),
501500
)
502501

502+
def _apply_history(
503+
self,
504+
request: GenerationRequest,
505+
history: HistoryT[GenerationRequest, GenerationResponse],
506+
) -> GenerationRequest:
507+
"""
508+
Apply conversation history to the current request.
509+
"""
510+
511+
def turn_to_text(turn: tuple[GenerationRequest, GenerationResponse]) -> str:
512+
req, res = turn
513+
return f"{req.content}{res.value}"
514+
515+
request.content = "".join(chain(map(turn_to_text, history), (request.content,)))
516+
return request
517+
503518
def _build_headers(
504519
self,
505520
api_key: Optional[str],

src/guidellm/scheduler/worker.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,9 @@ async def _process_next_request(self):
349349
# Process the request with the backend
350350
request_info.scheduler_timings.resolve_start = time.time()
351351
self._send_update("in_progress", response, request, request_info)
352-
async for resp, info in self.backend.resolve(request, request_info, None):
352+
async for resp, info in self.backend.resolve(
353+
request, request_info, history
354+
):
353355
response = resp
354356
request_info = info
355357

0 commit comments

Comments
 (0)