16
16
import json
17
17
import time
18
18
from collections .abc import AsyncIterator
19
+ from itertools import chain
19
20
from pathlib import Path
20
21
from typing import Any , ClassVar , Optional , Union
21
22
29
30
GenerationRequestTimings ,
30
31
GenerationResponse ,
31
32
)
32
- from guidellm .scheduler import ScheduledRequestInfo
33
+ from guidellm .scheduler import HistoryT , ScheduledRequestInfo
33
34
34
35
__all__ = ["OpenAIHTTPBackend" , "UsageStats" ]
35
36
@@ -280,7 +281,7 @@ async def resolve(
280
281
self ,
281
282
request : GenerationRequest ,
282
283
request_info : ScheduledRequestInfo ,
283
- history : Optional [list [ tuple [ GenerationRequest , GenerationResponse ] ]] = None ,
284
+ history : Optional [HistoryT [ GenerationRequest , GenerationResponse ]] = None ,
284
285
) -> AsyncIterator [tuple [GenerationResponse , ScheduledRequestInfo ]]:
285
286
"""
286
287
Process a generation request and yield progressive responses.
@@ -295,10 +296,8 @@ async def resolve(
295
296
:yields: Tuples of (response, updated_request_info) as generation progresses.
296
297
"""
297
298
self ._check_in_process ()
298
- if history is not None :
299
- raise NotImplementedError (
300
- "Multi-turn requests with conversation history are not yet supported"
301
- )
299
+ if history :
300
+ request = self ._apply_history (request , history )
302
301
303
302
response = GenerationResponse (
304
303
request_id = request .request_id ,
@@ -500,6 +499,22 @@ async def chat_completions(
500
499
self ._get_completions_usage_stats (data ),
501
500
)
502
501
502
+ def _apply_history (
503
+ self ,
504
+ request : GenerationRequest ,
505
+ history : HistoryT [GenerationRequest , GenerationResponse ],
506
+ ) -> GenerationRequest :
507
+ """
508
+ Apply conversation history to the current request.
509
+ """
510
+
511
+ def turn_to_text (turn : tuple [GenerationRequest , GenerationResponse ]) -> str :
512
+ req , res = turn
513
+ return f"{ req .content } { res .value } "
514
+
515
+ request .content = "" .join (chain (map (turn_to_text , history ), (request .content ,)))
516
+ return request
517
+
503
518
def _build_headers (
504
519
self ,
505
520
api_key : Optional [str ],
0 commit comments