|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import hashlib |
| 4 | +import json |
| 5 | +import threading |
| 6 | +import uuid |
| 7 | +from typing import Any, Dict, List, Tuple |
| 8 | + |
| 9 | + |
| 10 | +_LOCK = threading.Lock() |
| 11 | +_FINGERPRINT_TO_UUID: Dict[str, str] = {} |
| 12 | +_ORDER: List[str] = [] |
| 13 | +_MAX_ENTRIES = 10000 |
| 14 | + |
| 15 | + |
| 16 | +def _canonicalize_first_user_message(input_items: List[Dict[str, Any]]) -> Dict[str, Any] | None: |
| 17 | + """ |
| 18 | + Extract the first stable user message from Responses input items. Good use for a fingerprint for prompt caching. |
| 19 | + """ |
| 20 | + for item in input_items: |
| 21 | + if not isinstance(item, dict): |
| 22 | + continue |
| 23 | + if item.get("type") != "message": |
| 24 | + continue |
| 25 | + role = item.get("role") |
| 26 | + if role != "user": |
| 27 | + continue |
| 28 | + content = item.get("content") |
| 29 | + if not isinstance(content, list): |
| 30 | + continue |
| 31 | + norm_content = [] |
| 32 | + for part in content: |
| 33 | + if not isinstance(part, dict): |
| 34 | + continue |
| 35 | + ptype = part.get("type") |
| 36 | + if ptype == "input_text": |
| 37 | + text = part.get("text") if isinstance(part.get("text"), str) else "" |
| 38 | + if text: |
| 39 | + norm_content.append({"type": "input_text", "text": text}) |
| 40 | + elif ptype == "input_image": |
| 41 | + url = part.get("image_url") if isinstance(part.get("image_url"), str) else None |
| 42 | + if url: |
| 43 | + norm_content.append({"type": "input_image", "image_url": url}) |
| 44 | + if norm_content: |
| 45 | + return {"type": "message", "role": "user", "content": norm_content} |
| 46 | + return None |
| 47 | + |
| 48 | + |
| 49 | +def canonicalize_prefix(instructions: str | None, input_items: List[Dict[str, Any]]) -> str: |
| 50 | + prefix: Dict[str, Any] = {} |
| 51 | + if isinstance(instructions, str) and instructions.strip(): |
| 52 | + prefix["instructions"] = instructions.strip() |
| 53 | + first_user = _canonicalize_first_user_message(input_items) |
| 54 | + if first_user is not None: |
| 55 | + prefix["first_user_message"] = first_user |
| 56 | + return json.dumps(prefix, sort_keys=True, separators=(",", ":")) |
| 57 | + |
| 58 | + |
| 59 | +def _fingerprint(s: str) -> str: |
| 60 | + return hashlib.sha256(s.encode("utf-8")).hexdigest() |
| 61 | + |
| 62 | + |
| 63 | +def _remember(fp: str, sid: str) -> None: |
| 64 | + if fp in _FINGERPRINT_TO_UUID: |
| 65 | + return |
| 66 | + _FINGERPRINT_TO_UUID[fp] = sid |
| 67 | + _ORDER.append(fp) |
| 68 | + if len(_ORDER) > _MAX_ENTRIES: |
| 69 | + oldest = _ORDER.pop(0) |
| 70 | + _FINGERPRINT_TO_UUID.pop(oldest, None) |
| 71 | + |
| 72 | + |
| 73 | +def ensure_session_id( |
| 74 | + instructions: str | None, |
| 75 | + input_items: List[Dict[str, Any]], |
| 76 | + client_supplied: str | None = None, |
| 77 | +) -> str: |
| 78 | + if isinstance(client_supplied, str) and client_supplied.strip(): |
| 79 | + return client_supplied.strip() |
| 80 | + |
| 81 | + canon = canonicalize_prefix(instructions, input_items) |
| 82 | + fp = _fingerprint(canon) |
| 83 | + with _LOCK: |
| 84 | + if fp in _FINGERPRINT_TO_UUID: |
| 85 | + return _FINGERPRINT_TO_UUID[fp] |
| 86 | + sid = str(uuid.uuid4()) |
| 87 | + _remember(fp, sid) |
| 88 | + return sid |
| 89 | + |
0 commit comments