Skip to content

Commit 12f0f35

Browse files
authored
🐛 The prompt includes the thought process #2038
2 parents 9af761c + aebf020 commit 12f0f35

File tree

2 files changed

+558
-10
lines changed

2 files changed

+558
-10
lines changed

backend/utils/llm_utils.py

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,37 @@ def _process_thinking_tokens(
1818
) -> bool:
1919
"""
2020
Process tokens to filter out thinking content between <think> and </think> tags.
21+
Handles cases where providers only send a closing tag or mix reasoning_content.
2122
"""
22-
if is_thinking:
23-
return THINK_END_PATTERN not in new_token
23+
# Check for end tag first, as it might appear in the same token as start tag
24+
if THINK_END_PATTERN in new_token:
25+
# If we were never in think mode, treat everything accumulated so far as reasoning and clear it
26+
if not is_thinking:
27+
token_join.clear()
28+
if callback:
29+
callback("") # clear any previously streamed reasoning content
30+
31+
# Exit thinking mode and only keep content after </think>
32+
_, _, after_end = new_token.partition(THINK_END_PATTERN)
33+
is_thinking = False
34+
new_token = after_end
35+
# Continue processing the remaining content in this token
2436

37+
# Check for start tag (after processing end tag, in case both are in the same token)
2538
if THINK_START_PATTERN in new_token:
39+
# Drop any content before <think> and switch to thinking mode
40+
_, _, after_start = new_token.partition(THINK_START_PATTERN)
41+
new_token = after_start
42+
is_thinking = True
43+
44+
if is_thinking:
45+
# Still inside thinking content; ignore until we exit
2646
return True
2747

28-
token_join.append(new_token)
29-
if callback:
30-
callback("".join(token_join))
48+
if new_token:
49+
token_join.append(new_token)
50+
if callback:
51+
callback("".join(token_join))
3152

3253
return False
3354

@@ -46,8 +67,8 @@ def call_llm_for_system_prompt(
4667

4768
llm = OpenAIModel(
4869
model_id=get_model_name_from_config(llm_model_config) if llm_model_config else "",
49-
api_base=llm_model_config.get("base_url", ""),
50-
api_key=llm_model_config.get("api_key", ""),
70+
api_base=llm_model_config.get("base_url", "") if llm_model_config else "",
71+
api_key=llm_model_config.get("api_key", "") if llm_model_config else "",
5172
temperature=0.3,
5273
top_p=0.95,
5374
)
@@ -65,16 +86,38 @@ def call_llm_for_system_prompt(
6586
current_request = llm.client.chat.completions.create(stream=True, **completion_kwargs)
6687
token_join: List[str] = []
6788
is_thinking = False
89+
reasoning_content_seen = False
90+
content_tokens_seen = 0
6891
for chunk in current_request:
69-
new_token = chunk.choices[0].delta.content
92+
delta = chunk.choices[0].delta
93+
reasoning_content = getattr(delta, "reasoning_content", None)
94+
new_token = delta.content
95+
96+
# Note: reasoning_content is separate metadata and doesn't affect content filtering
97+
# We only filter content based on <think> tags in delta.content
98+
if reasoning_content:
99+
reasoning_content_seen = True
100+
logger.debug("Received reasoning_content (metadata only, not filtering content)")
101+
102+
# Process content token if it exists
70103
if new_token is not None:
104+
content_tokens_seen += 1
71105
is_thinking = _process_thinking_tokens(
72106
new_token,
73107
is_thinking,
74108
token_join,
75109
callback,
76110
)
77-
return "".join(token_join)
111+
112+
result = "".join(token_join)
113+
if not result and content_tokens_seen > 0:
114+
logger.warning(
115+
"Generated prompt is empty but %d content tokens were processed. "
116+
"This suggests all content was filtered out.",
117+
content_tokens_seen
118+
)
119+
120+
return result
78121
except Exception as exc:
79122
logger.error("Failed to generate prompt from LLM: %s", str(exc))
80123
raise

0 commit comments

Comments
 (0)