Skip to content

Commit 84b7336

Browse files
fix: remove 'think' content from llm outputs
Certain LLMs (e.g. DeepSeek R1 via Ollama) include 'think' tags and content in outputs, which breaks output JSON parsing and validation. Implemented a fix to remove 'think' tags and content from LLM outputs. Added relevant tests.
1 parent 26b0bbf commit 84b7336

36 files changed

Lines changed: 16256 additions & 18822 deletions

File tree

contextgem/internal/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
_is_json_serializable,
9090
_llm_call_result_is_valid,
9191
_parse_llm_output_as_json,
92+
_remove_thinking_content_from_llm_output,
9293
_run_async_calls,
9394
_run_sync,
9495
_setup_jinja2_template,
@@ -164,6 +165,7 @@
164165
"_is_json_serializable",
165166
"_get_sat_model",
166167
"_setup_jinja2_template",
168+
"_remove_thinking_content_from_llm_output",
167169
# Converters
168170
# DOCX
169171
"WORD_XML_NAMESPACES",

contextgem/internal/base/llms.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
_group_instances_by_fields,
6969
_llm_call_result_is_valid,
7070
_parse_llm_output_as_json,
71+
_remove_thinking_content_from_llm_output,
7172
_run_async_calls,
7273
_run_sync,
7374
_validate_parsed_llm_output,
@@ -1311,7 +1312,9 @@ def merge_usage_data(existing: _LLMUsage | None, new: _LLMUsage) -> _LLMUsage:
13111312
)
13121313
all_usage_data = merge_usage_data(all_usage_data, usage_data)
13131314
extracted_data = _validate_parsed_llm_output(
1314-
_parse_llm_output_as_json(extracted_data),
1315+
_parse_llm_output_as_json(
1316+
_remove_thinking_content_from_llm_output(extracted_data)
1317+
),
13151318
extracted_item_type=extracted_item_type,
13161319
justification_provided=add_justifications,
13171320
references_provided=add_references,

contextgem/internal/utils.py

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,49 @@ def _llm_call_result_is_valid(res: tuple[Any, _LLMUsage] | None) -> bool:
426426
return True
427427

428428

429-
def _parse_llm_output_as_json(output_str: str | dict | list) -> dict | list | None:
429+
def _remove_thinking_content_from_llm_output(output_str: str | None) -> str | None:
430+
"""
431+
Removes thinking content enclosed in <think></think> tags from the beginning of LLM outputs.
432+
433+
When using local reasoning LLMs (e.g. DeepSeek R1 in Ollama), the output may include
434+
thinking steps enclosed in <think></think> tags at the beginning. This function removes those tags
435+
and their content only if they appear at the start of the string, then strips any remaining whitespace.
436+
437+
This preserves any <think></think> tags that might appear later in the content as part of the
438+
actual response.
439+
440+
:param output_str: The output string from an LLM that may contain thinking content, can be None
441+
if LLM outputs invalid content
442+
:type output_str: str | None
443+
444+
:return: The cleaned string without initial thinking content and extra whitespace, or None if
445+
the input was None or an error occurred during processing
446+
:rtype: str | None
447+
"""
448+
if output_str is None:
449+
return None
450+
451+
try:
452+
# Check if the string starts with <think> tag
453+
if output_str.strip().startswith("<think>"):
454+
# Find the first closing </think> tag
455+
end_tag_pos = output_str.find("</think>")
456+
if end_tag_pos != -1:
457+
# Remove everything from start to the end of </think> tag
458+
cleaned_str = output_str[end_tag_pos + len("</think>") :]
459+
# Strip any remaining whitespace
460+
cleaned_str = cleaned_str.strip()
461+
assert len(cleaned_str) > 0, "Cleaned string is empty"
462+
return cleaned_str
463+
464+
return output_str.strip()
465+
except (AssertionError, AttributeError):
466+
return None
467+
468+
469+
def _parse_llm_output_as_json(
470+
output_str: str | dict | list | None,
471+
) -> dict | list | None:
430472
"""
431473
Parses the provided LLM-generated output into a JSON-compatible Python object.
432474
@@ -436,8 +478,9 @@ def _parse_llm_output_as_json(output_str: str | dict | list) -> dict | list | No
436478
` ```json` code block, removing them before parsing.
437479
438480
:param output_str: The output string to parse. It may already be a JSON-parsed Python object,
439-
a JSON string, or a string containing a JSON code block marked with ` ```json`.
440-
:type output_str: str | dict | list
481+
a JSON string, or a string containing a JSON code block marked with ` ```json`. Can be None
482+
if LLM outputs invalid content.
483+
:type output_str: str | dict | list | None
441484
442485
:return: A dictionary, a list, or `None` if parsing fails or the `output_str` type is invalid.
443486
:rtype: dict | list | None
@@ -448,8 +491,19 @@ def _parse_llm_output_as_json(output_str: str | dict | list) -> dict | list | No
448491

449492
except json.JSONDecodeError:
450493
try:
451-
# Strip of surrounding ```json, if any
452-
answer = output_str.lstrip(r"```json").rstrip(r"```")
494+
# Handle markdown code blocks using regex
495+
496+
answer = output_str.strip()
497+
498+
# Pattern to match content between ```json
499+
# (at string start) and ``` (at string end) markers
500+
json_block_pattern = r"^```json\s*([\s\S]*?)\s*```$"
501+
match = re.match(json_block_pattern, answer)
502+
503+
if match:
504+
# Get the content between the markers
505+
answer = match.group(1).strip()
506+
453507
return json.loads(answer)
454508
except json.JSONDecodeError:
455509
return None

contextgem/public/llms.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -946,7 +946,9 @@ async def _query_llm(
946946
chat_completion = await task
947947
else:
948948
chat_completion = await task
949-
answer = chat_completion.choices[0].message.content.strip()
949+
answer = chat_completion.choices[
950+
0
951+
].message.content # str, or None if invalid response
950952
usage.input = chat_completion.usage.prompt_tokens
951953
usage.output = chat_completion.usage.completion_tokens
952954
llm_call_obj._record_response_timestamp()

dev/requirements/requirements.dev.txt

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ aiosignal==1.3.2 ; python_version >= "3.10" and python_version < "3.14"
77
alabaster==0.7.16 ; python_version >= "3.10" and python_version < "3.14"
88
annotated-types==0.7.0 ; python_version >= "3.10" and python_version < "3.14"
99
anyio==4.9.0 ; python_version >= "3.10" and python_version < "3.14"
10-
argcomplete==3.5.3 ; python_version >= "3.10" and python_version < "3.14"
10+
argcomplete==3.6.2 ; python_version >= "3.10" and python_version < "3.14"
1111
async-timeout==5.0.1 ; python_version == "3.10"
1212
attrs==25.3.0 ; python_version >= "3.10" and python_version < "3.14"
1313
babel==2.17.0 ; python_version >= "3.10" and python_version < "3.14"
@@ -20,7 +20,7 @@ cfgv==3.4.0 ; python_version >= "3.10" and python_version < "3.14"
2020
charset-normalizer==3.4.2 ; python_version >= "3.10" and python_version < "3.14"
2121
click==8.1.8 ; python_version >= "3.10" and python_version < "3.14"
2222
colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.14"
23-
commitizen==4.6.0 ; python_version >= "3.10" and python_version < "3.14"
23+
commitizen==4.6.3 ; python_version >= "3.10" and python_version < "3.14"
2424
coverage==7.8.0 ; python_version >= "3.10" and python_version < "3.14"
2525
decli==0.6.2 ; python_version >= "3.10" and python_version < "3.14"
2626
distlib==0.3.9 ; python_version >= "3.10" and python_version < "3.14"
@@ -33,9 +33,10 @@ filelock==3.18.0 ; python_version >= "3.10" and python_version < "3.14"
3333
frozenlist==1.6.0 ; python_version >= "3.10" and python_version < "3.14"
3434
fsspec==2025.3.2 ; python_version >= "3.10" and python_version < "3.14"
3535
h11==0.16.0 ; python_version >= "3.10" and python_version < "3.14"
36+
hf-xet==1.1.0 ; python_version >= "3.10" and python_version < "3.14" and (platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "arm64" or platform_machine == "aarch64")
3637
httpcore==1.0.9 ; python_version >= "3.10" and python_version < "3.14"
3738
httpx==0.28.1 ; python_version >= "3.10" and python_version < "3.14"
38-
huggingface-hub==0.30.2 ; python_version >= "3.10" and python_version < "3.14"
39+
huggingface-hub==0.31.1 ; python_version >= "3.10" and python_version < "3.14"
3940
identify==2.6.10 ; python_version >= "3.10" and python_version < "3.14"
4041
idna==3.10 ; python_version >= "3.10" and python_version < "3.14"
4142
imagesize==1.4.1 ; python_version >= "3.10" and python_version < "3.14"
@@ -48,7 +49,7 @@ joblib==1.5.0 ; python_version >= "3.10" and python_version < "3.14"
4849
jsonschema-specifications==2025.4.1 ; python_version >= "3.10" and python_version < "3.14"
4950
jsonschema==4.23.0 ; python_version >= "3.10" and python_version < "3.14"
5051
jupyter-core==5.7.2 ; python_version >= "3.10" and python_version < "3.14"
51-
litellm==1.68.0 ; python_version >= "3.10" and python_version < "3.14"
52+
litellm==1.68.1 ; python_version >= "3.10" and python_version < "3.14"
5253
loguru==0.7.3 ; python_version >= "3.10" and python_version < "3.14"
5354
markupsafe==3.0.2 ; python_version >= "3.10" and python_version < "3.14"
5455
mosestokenizer==1.2.1 ; python_version >= "3.10" and python_version < "3.14"
@@ -80,7 +81,7 @@ pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.14"
8081
pathspec==0.12.1 ; python_version >= "3.10" and python_version < "3.14"
8182
pip-tools==7.4.1 ; python_version >= "3.10" and python_version < "3.14"
8283
pip==25.1.1 ; python_version >= "3.10" and python_version < "3.14"
83-
platformdirs==4.3.7 ; python_version >= "3.10" and python_version < "3.14"
84+
platformdirs==4.3.8 ; python_version >= "3.10" and python_version < "3.14"
8485
pluggy==1.5.0 ; python_version >= "3.10" and python_version < "3.14"
8586
pre-commit==4.2.0 ; python_version >= "3.10" and python_version < "3.14"
8687
prompt-toolkit==3.0.51 ; python_version >= "3.10" and python_version < "3.14"
@@ -106,12 +107,12 @@ requests==2.32.3 ; python_version >= "3.10" and python_version < "3.14"
106107
rpds-py==0.24.0 ; python_version >= "3.10" and python_version < "3.14"
107108
safetensors==0.5.3 ; python_version >= "3.10" and python_version < "3.14"
108109
scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "3.14"
109-
scipy==1.15.2 ; python_version >= "3.10" and python_version < "3.14"
110-
setuptools==80.2.0 ; python_version >= "3.10" and python_version < "3.14"
110+
scipy==1.15.3 ; python_version >= "3.10" and python_version < "3.14"
111+
setuptools==80.3.1 ; python_version >= "3.10" and python_version < "3.14"
111112
six==1.17.0 ; python_version >= "3.10" and python_version < "3.14"
112113
skops==0.11.0 ; python_version >= "3.10" and python_version < "3.14"
113114
sniffio==1.3.1 ; python_version >= "3.10" and python_version < "3.14"
114-
snowballstemmer==2.2.0 ; python_version >= "3.10" and python_version < "3.14"
115+
snowballstemmer==3.0.0.1 ; python_version >= "3.10" and python_version < "3.14"
115116
soupsieve==2.7 ; python_version >= "3.10" and python_version < "3.14"
116117
sphinx-autodoc-typehints==2.3.0 ; python_version >= "3.10" and python_version < "3.14"
117118
sphinx-book-theme==1.1.4 ; python_version >= "3.10" and python_version < "3.14"
@@ -145,7 +146,7 @@ tzdata==2025.2 ; python_version >= "3.10" and python_version < "3.14"
145146
uctools==1.3.0 ; python_version >= "3.10" and python_version < "3.14"
146147
urllib3==2.4.0 ; python_version >= "3.10" and python_version < "3.14"
147148
vcrpy @ git+https://github.com/shcherbak-ai/vcrpy.git@cda84a815570a3a7bdf93c413901d61230581831 ; python_version >= "3.10" and python_version < "3.14"
148-
virtualenv==20.30.0 ; python_version >= "3.10" and python_version < "3.14"
149+
virtualenv==20.31.2 ; python_version >= "3.10" and python_version < "3.14"
149150
wcwidth==0.2.13 ; python_version >= "3.10" and python_version < "3.14"
150151
wheel==0.45.1 ; python_version >= "3.10" and python_version < "3.14"
151152
win32-setctime==1.2.0 ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "win32"

dev/requirements/requirements.main.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,18 @@ filelock==3.18.0 ; python_version >= "3.10" and python_version < "3.14"
1919
frozenlist==1.6.0 ; python_version >= "3.10" and python_version < "3.14"
2020
fsspec==2025.3.2 ; python_version >= "3.10" and python_version < "3.14"
2121
h11==0.16.0 ; python_version >= "3.10" and python_version < "3.14"
22+
hf-xet==1.1.0 ; python_version >= "3.10" and python_version < "3.14" and (platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "arm64" or platform_machine == "aarch64")
2223
httpcore==1.0.9 ; python_version >= "3.10" and python_version < "3.14"
2324
httpx==0.28.1 ; python_version >= "3.10" and python_version < "3.14"
24-
huggingface-hub==0.30.2 ; python_version >= "3.10" and python_version < "3.14"
25+
huggingface-hub==0.31.1 ; python_version >= "3.10" and python_version < "3.14"
2526
idna==3.10 ; python_version >= "3.10" and python_version < "3.14"
2627
importlib-metadata==8.7.0 ; python_version >= "3.10" and python_version < "3.14"
2728
jinja2==3.1.6 ; python_version >= "3.10" and python_version < "3.14"
2829
jiter==0.9.0 ; python_version >= "3.10" and python_version < "3.14"
2930
joblib==1.5.0 ; python_version >= "3.10" and python_version < "3.14"
3031
jsonschema-specifications==2025.4.1 ; python_version >= "3.10" and python_version < "3.14"
3132
jsonschema==4.23.0 ; python_version >= "3.10" and python_version < "3.14"
32-
litellm==1.68.0 ; python_version >= "3.10" and python_version < "3.14"
33+
litellm==1.68.1 ; python_version >= "3.10" and python_version < "3.14"
3334
loguru==0.7.3 ; python_version >= "3.10" and python_version < "3.14"
3435
markupsafe==3.0.2 ; python_version >= "3.10" and python_version < "3.14"
3536
mosestokenizer==1.2.1 ; python_version >= "3.10" and python_version < "3.14"
@@ -69,8 +70,8 @@ requests==2.32.3 ; python_version >= "3.10" and python_version < "3.14"
6970
rpds-py==0.24.0 ; python_version >= "3.10" and python_version < "3.14"
7071
safetensors==0.5.3 ; python_version >= "3.10" and python_version < "3.14"
7172
scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "3.14"
72-
scipy==1.15.2 ; python_version >= "3.10" and python_version < "3.14"
73-
setuptools==80.2.0 ; python_version >= "3.10" and python_version < "3.14" and platform_system == "Linux" and platform_machine == "x86_64" or python_version >= "3.12" and python_version < "3.14"
73+
scipy==1.15.3 ; python_version >= "3.10" and python_version < "3.14"
74+
setuptools==80.3.1 ; python_version >= "3.10" and python_version < "3.14" and platform_system == "Linux" and platform_machine == "x86_64" or python_version >= "3.12" and python_version < "3.14"
7475
six==1.17.0 ; python_version >= "3.10" and python_version < "3.14"
7576
skops==0.11.0 ; python_version >= "3.10" and python_version < "3.14"
7677
sniffio==1.3.1 ; python_version >= "3.10" and python_version < "3.14"

0 commit comments

Comments
 (0)