Skip to content

Commit 78a4728

Browse files
feat: remove think content when text containing only </think>
1 parent e6fd69e commit 78a4728

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

graphgen/bases/base_llm_wrapper.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,17 @@ async def generate_inputs_prob(
6161
def filter_think_tags(text: str, think_tag: str = "think") -> str:
6262
"""
6363
Remove <think> tags from the text.
64-
If the text contains <think> and </think>, it removes everything between them and the tags themselves.
64+
- If the text contains <think> and </think>, it removes everything between them and the tags themselves.
65+
- If the text contains only </think>, it removes content before the tag.
6566
"""
66-
think_pattern = re.compile(rf"<{think_tag}>.*?</{think_tag}>", re.DOTALL)
67-
filtered_text = think_pattern.sub("", text).strip()
68-
return filtered_text if filtered_text else text.strip()
67+
paired_pattern = re.compile(rf"<{think_tag}>.*?</{think_tag}>", re.DOTALL)
68+
filtered = paired_pattern.sub("", text)
69+
70+
orphan_pattern = re.compile(rf"^.*?</{think_tag}>", re.DOTALL)
71+
filtered = orphan_pattern.sub("", filtered)
72+
73+
filtered = filtered.strip()
74+
return filtered if filtered else text.strip()
6975

7076
def shutdown(self) -> None:
7177
"""Shutdown the LLM engine if applicable."""

0 commit comments

Comments
 (0)