|
| 1 | +import itertools |
1 | 2 | from abc import ABC, abstractmethod
|
2 | 3 | from typing import Generic, TypeVar
|
3 | 4 |
|
@@ -34,22 +35,47 @@ def complete(self) -> bool: ...
|
34 | 35 |
|
35 | 36 |
|
36 | 37 | class GenerativeRequestSession(RequestSession[GenerationRequest, ResponseSummary]):
|
37 |
| - def __init__(self, request: GenerationRequest) -> None: |
38 |
| - self.request = request |
39 |
| - self._complete = False |
| 38 | + def __init__(self, prompts: list[GenerationRequest]) -> None: |
| 39 | + if not prompts: |
| 40 | + raise ValueError("Prompts cannot be empty") |
| 41 | + |
| 42 | + self.prompts = prompts |
| 43 | + self.responses: list[str] = [] |
40 | 44 |
|
41 | 45 | def __len__(self) -> int:
|
42 |
| - return 1 |
| 46 | + return len(self.prompts) |
43 | 47 |
|
44 | 48 | def get_next_request(self) -> GenerationRequest:
|
45 |
| - return self.request |
| 49 | + completed_responses = len(self.responses) |
| 50 | + base_request = self.prompts[completed_responses].model_copy(deep=True) |
| 51 | + base_request.content = "".join( |
| 52 | + itertools.chain.from_iterable( |
| 53 | + zip((x.content for x in self.prompts), self.responses + [""]) |
| 54 | + ) |
| 55 | + ) |
| 56 | + base_request.stats["prompt_tokens"] = sum( |
| 57 | + x.stats["prompt_tokens"] for x in self.prompts[: completed_responses + 1] |
| 58 | + ) |
| 59 | + base_request.constraints["output_tokens"] = sum( |
| 60 | + x.constraints["output_tokens"] |
| 61 | + for x in self.prompts[: completed_responses + 1] |
| 62 | + ) |
| 63 | + |
| 64 | + return base_request |
46 | 65 |
|
47 | 66 | def get_next_delay(self) -> float:
|
48 | 67 | return 0.0
|
49 | 68 |
|
50 |
| - def push_response(self, response: ResponseSummary) -> None: # noqa: ARG002 |
51 |
| - self._complete = True |
| 69 | + def push_response(self, response: ResponseSummary) -> None: |
| 70 | + if len(self.responses) < len(self.prompts): |
| 71 | + if response.response_output_tokens is not None: |
| 72 | + self.prompts[len(self.responses)].constraints["output_tokens"] = ( |
| 73 | + response.response_output_tokens |
| 74 | + ) |
| 75 | + self.responses.append(response.value) |
| 76 | + else: |
| 77 | + raise ValueError("Response list full") |
52 | 78 |
|
53 | 79 | @property
|
54 | 80 | def complete(self) -> bool:
|
55 |
| - return self._complete |
| 81 | + return len(self.responses) >= len(self.prompts) |
0 commit comments