|
13 | 13 | from aiperf.endpoints.base_endpoint import BaseEndpoint |
14 | 14 |
|
15 | 15 |
|
| 16 | +def _extract_v2_text(output: dict[str, Any]) -> str | None: |
| 17 | + """Extract text from a V2 BYTES output tensor. |
| 18 | +
|
| 19 | + Args: |
| 20 | + output: V2 output tensor dict with ``data`` key. |
| 21 | +
|
| 22 | + Returns: |
| 23 | + First data element as string, or None if empty. |
| 24 | + """ |
| 25 | + data = output.get("data") |
| 26 | + if isinstance(data, list) and len(data) > 0 and data[0] is not None: |
| 27 | + return str(data[0]) |
| 28 | + return None |
| 29 | + |
| 30 | + |
| 31 | +def parse_v2_text_response( |
| 32 | + endpoint: BaseEndpoint, |
| 33 | + response: InferenceServerResponse, |
| 34 | + output_name: str, |
| 35 | +) -> ParsedResponse | None: |
| 36 | + """Parse V2 inference response, extracting text from BYTES output tensor. |
| 37 | +
|
| 38 | + Shared by KServeV2InferEndpoint and KServeV2VLMEndpoint since both |
| 39 | + produce text output in the same tensor format. |
| 40 | +
|
| 41 | + Args: |
| 42 | + endpoint: Endpoint instance (for make_text_response_data). |
| 43 | + response: Raw response from inference server. |
| 44 | + output_name: Expected output tensor name. |
| 45 | +
|
| 46 | + Returns: |
| 47 | + Parsed response with extracted text content, or None if no content. |
| 48 | + """ |
| 49 | + json_obj = response.get_json() |
| 50 | + if not json_obj: |
| 51 | + return None |
| 52 | + |
| 53 | + outputs = json_obj.get("outputs") |
| 54 | + if not outputs: |
| 55 | + return None |
| 56 | + |
| 57 | + for output in outputs: |
| 58 | + if output.get("name") == output_name: |
| 59 | + text = _extract_v2_text(output) |
| 60 | + if text is not None: |
| 61 | + return ParsedResponse( |
| 62 | + perf_ns=response.perf_ns, |
| 63 | + data=endpoint.make_text_response_data(text), |
| 64 | + ) |
| 65 | + |
| 66 | + for output in outputs: |
| 67 | + text = _extract_v2_text(output) |
| 68 | + if text is not None: |
| 69 | + return ParsedResponse( |
| 70 | + perf_ns=response.perf_ns, |
| 71 | + data=endpoint.make_text_response_data(text), |
| 72 | + ) |
| 73 | + |
| 74 | + return None |
| 75 | + |
| 76 | + |
16 | 77 | class KServeV2InferEndpoint(BaseEndpoint): |
17 | 78 | """KServe V2 Open Inference Protocol endpoint for Triton/TRT-LLM. |
18 | 79 |
|
@@ -91,32 +152,4 @@ def parse_response( |
91 | 152 | Returns: |
92 | 153 | Parsed response with extracted text content, or None if no content |
93 | 154 | """ |
94 | | - json_obj = response.get_json() |
95 | | - if not json_obj: |
96 | | - return None |
97 | | - |
98 | | - outputs = json_obj.get("outputs") |
99 | | - if not outputs: |
100 | | - return None |
101 | | - |
102 | | - for output in outputs: |
103 | | - if output.get("name") == self._output_name: |
104 | | - data = output.get("data") |
105 | | - if isinstance(data, list) and len(data) > 0 and data[0] is not None: |
106 | | - text = str(data[0]) |
107 | | - return ParsedResponse( |
108 | | - perf_ns=response.perf_ns, |
109 | | - data=self.make_text_response_data(text), |
110 | | - ) |
111 | | - |
112 | | - # Fallback: try first output with data |
113 | | - for output in outputs: |
114 | | - data = output.get("data") |
115 | | - if isinstance(data, list) and len(data) > 0 and data[0] is not None: |
116 | | - text = str(data[0]) |
117 | | - return ParsedResponse( |
118 | | - perf_ns=response.perf_ns, |
119 | | - data=self.make_text_response_data(text), |
120 | | - ) |
121 | | - |
122 | | - return None |
| 155 | + return parse_v2_text_response(self, response, self._output_name) |
0 commit comments