Skip to content

Commit c668164

Browse files
committed
fix: stream type done but response.id
1 parent 5b9e6d7 commit c668164

File tree

3 files changed

+77
-39
lines changed

3 files changed

+77
-39
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ dependencies = [
1616
"a2a-sdk>=0.3.0", # For Google Agent2Agent protocol
1717
"deprecated>=1.2.18",
1818
"google-adk>=1.10.0", # For basic agent architecture
19-
"litellm>=1.74.3", # For model inference
19+
"litellm>=1.79.3", # For model inference
2020
"loguru>=0.7.3", # For better logging
2121
"opentelemetry-exporter-otlp>=1.35.0",
2222
"opentelemetry-instrumentation-logging>=0.56b0",

veadk/models/ark_llm.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from google.adk.models.lite_llm import (
1111
LiteLlm,
1212
_get_completion_inputs,
13-
_model_response_to_chunk,
1413
FunctionChunk,
1514
TextChunk,
1615
_message_to_generate_content_response,
@@ -25,7 +24,7 @@
2524
)
2625
from pydantic import Field
2726

28-
from veadk.models.transform import (
27+
from veadk.models.ark_transform import (
2928
CompletionToResponsesAPIHandler,
3029
)
3130
from veadk.utils.logger import get_logger
@@ -116,11 +115,13 @@ async def generate_content_async(
116115
usage_metadata = None
117116
fallback_index = 0
118117
raw_response = await self.llm_client.aresponse(**response_args)
119-
stream_events = self.transform_handler.transform_streamable_response(
120-
raw_response, model=self.model
121-
)
122-
async for part in stream_events:
123-
for chunk, finish_reason in _model_response_to_chunk(part):
118+
async for part in raw_response:
119+
for (
120+
chunk,
121+
finish_reason,
122+
) in self.transform_handler.stream_event_to_chunk(
123+
part, model=self.model
124+
):
124125
if isinstance(chunk, FunctionChunk):
125126
index = chunk.index or fallback_index
126127
if index not in function_calls:
@@ -221,7 +222,7 @@ def _openai_response_to_generate_content_response(
221222
OpenAITypeResponse -> litellm.ModelResponse -> LlmResponse
222223
"""
223224
model_response = self.transform_handler.transform_response(
224-
openai_response=raw_response,
225+
openai_response=raw_response, stream=False
225226
)
226227
llm_response = _model_response_to_generate_content_response(model_response)
227228

Lines changed: 67 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,31 @@
11
import uuid
2-
from datetime import datetime
3-
from typing import Any, Dict, Optional, cast, List
2+
from typing import Any, Dict, Optional, cast, List, Generator, Tuple, Union
43

54
import litellm
6-
from openai.types.responses import Response as OpenAITypeResponse
5+
from google.adk.models.lite_llm import (
6+
TextChunk,
7+
FunctionChunk,
8+
UsageMetadataChunk,
9+
_model_response_to_chunk,
10+
)
11+
from openai.types.responses import (
12+
Response as OpenAITypeResponse,
13+
ResponseStreamEvent,
14+
ResponseTextDeltaEvent,
15+
)
16+
from openai.types.responses import (
17+
ResponseCompletedEvent,
18+
)
719
from litellm.completion_extras.litellm_responses_transformation.transformation import (
820
LiteLLMResponsesTransformationHandler,
921
)
1022
from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
11-
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
1223
from litellm.types.llms.openai import ResponsesAPIResponse
1324
from litellm.types.utils import (
1425
ModelResponse,
1526
LlmProviders,
27+
Choices,
28+
Message,
1629
)
1730
from litellm.utils import ProviderConfigManager
1831

@@ -156,12 +169,13 @@ def transform_request(
156169
# Filter and reorganize scenarios that are not supported by some arks
157170
return ark_field_reorganization(result)
158171

159-
def transform_response(self, openai_response: OpenAITypeResponse) -> ModelResponse:
160-
# responses api response to completion response
161-
# get message
172+
def transform_response(
173+
self, openai_response: OpenAITypeResponse, stream: bool = False
174+
) -> ModelResponse:
175+
# openai_type_response -> responses_api_response -> completion_response
162176
raw_response = ResponsesAPIResponse(**openai_response.model_dump())
163177

164-
model_response = ModelResponse()
178+
model_response = ModelResponse(stream=stream)
165179
setattr(model_response, "usage", litellm.Usage())
166180
response = self.litellm_handler.transform_response(
167181
model=raw_response.model,
@@ -178,25 +192,48 @@ def transform_response(self, openai_response: OpenAITypeResponse) -> ModelRespon
178192
response.id = raw_response.id
179193
return response
180194

181-
def transform_streamable_response(self, result, model: str):
182-
logging_obj = litellm.Logging(
183-
model="doubao",
184-
messages=None,
185-
stream=True,
186-
call_type="acompletion",
187-
litellm_call_id=str(uuid.uuid4()),
188-
function_id=str(uuid.uuid4()),
189-
start_time=datetime.now(),
190-
)
191-
completion_stream = self.litellm_handler.get_model_response_iterator(
192-
streaming_response=result,
193-
sync_stream=True,
194-
json_mode=False,
195-
)
196-
streamwrapper = CustomStreamWrapper(
197-
completion_stream=completion_stream,
198-
model=model,
199-
custom_llm_provider="openai",
200-
logging_obj=logging_obj,
201-
)
202-
return streamwrapper
195+
def stream_event_to_chunk(
196+
self, event: ResponseStreamEvent, model: str
197+
) -> Generator[
198+
Tuple[
199+
Optional[Union[TextChunk, FunctionChunk, UsageMetadataChunk]],
200+
Optional[str],
201+
],
202+
None,
203+
None,
204+
]:
205+
choices = []
206+
model_response = None
207+
208+
if isinstance(event, ResponseTextDeltaEvent):
209+
delta = Message(content=event.delta)
210+
choices.append(
211+
Choices(delta=delta, index=event.output_index, finish_reason=None)
212+
)
213+
model_response = ModelResponse(
214+
stream=True, choices=choices, model=model, id=str(uuid.uuid4())
215+
)
216+
elif isinstance(event, ResponseCompletedEvent):
217+
pass
218+
response = event.response
219+
model_response = self.transform_response(response, stream=True)
220+
model_response = fix_response(model_response)
221+
else:
222+
# Ignore other event types like ResponseOutputItemAddedEvent, etc.
223+
pass
224+
225+
if model_response:
226+
yield from _model_response_to_chunk(model_response)
227+
228+
229+
def fix_response(model_response: ModelResponse) -> ModelResponse:
230+
"""
231+
Fix the response to ensure some fields that cannot be transferred through direct conversion.
232+
"""
233+
for i, choice in enumerate(model_response.choices):
234+
if choice.message.tool_calls:
235+
for idx, tool_call in enumerate(choice.message.tool_calls):
236+
if not tool_call.get("index"):
237+
model_response.choices[i].message.tool_calls[idx].index = 0
238+
239+
return model_response

0 commit comments

Comments
 (0)