Skip to content

Commit 0ab4d40

Browse files
committed
feat: system-prompt
1 parent 37bc971 commit 0ab4d40

File tree

2 files changed

+36
-10
lines changed

2 files changed

+36
-10
lines changed

veadk/agent.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,8 @@ def model_post_init(self, __context: Any) -> None:
201201

202202
if not self.model:
203203
if self.enable_responses:
204-
# from veadk.utils.patches import patch_google_adk_call_llm_async
205204
from veadk.models.ark_llm import ArkLlm
206205

207-
# patch_google_adk_call_llm_async()
208206
self.model = ArkLlm(
209207
model=f"{self.model_provider}/{self.model_name}",
210208
api_key=self.model_api_key,

veadk/models/ark_llm.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,18 @@ async def openai_response_async(request_data: dict):
108108
)
109109
filtered_request_data["model"] = model_name # remove custom_llm_provider
110110

111+
# Remove tools in subsequent rounds (when previous_response_id is present)
111112
if (
112113
"tools" in filtered_request_data
114+
and "previous_response_id" in filtered_request_data
115+
and filtered_request_data["previous_response_id"] is not None
116+
):
117+
# Remove tools in subsequent rounds regardless of caching status
118+
del filtered_request_data["tools"]
119+
120+
# Ensure thinking field consistency for cache usage
121+
if (
122+
"thinking" in filtered_request_data
113123
and "extra_body" in filtered_request_data
114124
and isinstance(filtered_request_data["extra_body"], dict)
115125
and "caching" in filtered_request_data["extra_body"]
@@ -118,20 +128,37 @@ async def openai_response_async(request_data: dict):
118128
and "previous_response_id" in filtered_request_data
119129
and filtered_request_data["previous_response_id"] is not None
120130
):
121-
# Remove tools when caching is enabled and previous_response_id is present
122-
del filtered_request_data["tools"]
131+
# For cache usage, thinking should be consistent with previous round
132+
# If thinking is present but inconsistent, remove it to avoid cache miss
133+
# Note: This is a placeholder - actual consistency check requires state tracking
134+
pass
123135

124-
# Remove instructions when caching is enabled with specific configuration
136+
# Ensure store field is true or default when caching is enabled
125137
if (
126-
"instructions" in filtered_request_data
127-
and "extra_body" in filtered_request_data
138+
"extra_body" in filtered_request_data
128139
and isinstance(filtered_request_data["extra_body"], dict)
129140
and "caching" in filtered_request_data["extra_body"]
130141
and isinstance(filtered_request_data["extra_body"]["caching"], dict)
131142
and filtered_request_data["extra_body"]["caching"].get("type") == "enabled"
132143
):
133-
# Remove instructions when caching is enabled
134-
del filtered_request_data["instructions"]
144+
# Set store to true when caching is enabled for writing
145+
if "store" not in filtered_request_data:
146+
filtered_request_data["store"] = True
147+
elif filtered_request_data["store"] is False:
148+
# Override false to true for cache writing
149+
filtered_request_data["store"] = True
150+
151+
# [NOTE] Due to the Volcano Ark settings, there is a conflict between the cache and the instructions field.
152+
# If a system prompt is needed, it should be placed in the system role message within the input, instead of using the instructions parameter.
153+
# https://www.volcengine.com/docs/82379/1585128
154+
instructions = filtered_request_data.pop("instructions", None)
155+
filtered_request_data["input"] = [
156+
{
157+
"content": [{"text": instructions, "type": "input_text"}],
158+
"role": "system",
159+
"type": "message",
160+
}
161+
] + filtered_request_data["input"]
135162

136163
client = OpenAI(
137164
base_url=request_data["api_base"],
@@ -164,7 +191,8 @@ async def acompletion(
164191
) = self._get_request_data(model, messages, tools, **kwargs)
165192

166193
# 3. Call litellm.aresponses with the transformed request data
167-
# Cannot be called directly; there is a litellm bug :
194+
# [NOTE] Cannot be called directly; there is a litellm bug,
195+
# Therefore, we cannot directly call litellm.aresponses:
168196
# https://github.com/BerriAI/litellm/issues/16267
169197
# raw_response = await aresponses(
170198
# **request_data,

0 commit comments

Comments
 (0)