|
10 | 10 | import json |
11 | 11 | import os |
12 | 12 | import sqlite3 |
| 13 | +import uuid |
13 | 14 | from collections.abc import Generator |
14 | 15 | from contextlib import contextmanager |
15 | 16 | from enum import StrEnum |
16 | 17 | from pathlib import Path |
17 | 18 | from typing import Any, Literal, cast |
18 | 19 |
|
| 20 | +from openai.types.chat import ChatCompletion, ChatCompletionChunk |
| 21 | + |
19 | 22 | from llama_stack.log import get_logger |
20 | 23 |
|
21 | 24 | logger = get_logger(__name__, category="testing") |
@@ -248,6 +251,20 @@ async def _patched_inference_method(original_method, self, client_type, endpoint |
248 | 251 | recording = _current_storage.find_recording(request_hash) |
249 | 252 | if recording: |
250 | 253 | response_body = recording["response"]["body"] |
| 254 | + if ( |
| 255 | + isinstance(response_body, list) |
| 256 | + and len(response_body) > 0 |
| 257 | + and isinstance(response_body[0], ChatCompletionChunk) |
| 258 | + ): |
| 259 | + # We can't replay chatcompletions with the same id and we store them in a sqlite database with a unique constraint on the id. |
| 260 | + # So we generate a new id and replace the old one. |
| 261 | + newid = uuid.uuid4().hex |
| 262 | + response_body[0].id = "chatcmpl-" + newid |
| 263 | + elif isinstance(response_body, ChatCompletion): |
| 264 | + # We can't replay chatcompletions with the same id and we store them in a sqlite database with a unique constraint on the id. |
| 265 | + # So we generate a new id and replace the old one. |
| 266 | + newid = uuid.uuid4().hex |
| 267 | + response_body.id = "chatcmpl-" + newid |
251 | 268 |
|
252 | 269 | if recording["response"].get("is_streaming", False): |
253 | 270 |
|
|
0 commit comments