Skip to content

Commit e6429f6

Browse files
feat(ollama_chat/transformation.py): handle thinking content on streaming for ollama chat models
Output parse correctly to 'reasoning_content'
1 parent 90bd89c commit e6429f6

File tree

2 files changed

+45
-13
lines changed

2 files changed

+45
-13
lines changed

litellm/llms/ollama/chat/transformation.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,9 @@ def get_model_response_iterator(
437437

438438

439439
class OllamaChatCompletionResponseIterator(BaseModelResponseIterator):
440+
started_reasoning_content: bool = False
441+
finished_reasoning_content: bool = False
442+
440443
def _is_function_call_complete(self, function_args: Union[str, dict]) -> bool:
441444
if isinstance(function_args, dict):
442445
return True
@@ -490,8 +493,49 @@ def chunk_parser(self, chunk: dict) -> ModelResponseStream:
490493
if is_function_call_complete:
491494
tool_call["id"] = str(uuid.uuid4())
492495

496+
# PROCESS REASONING CONTENT
497+
reasoning_content: Optional[str] = None
498+
content: Optional[str] = None
499+
if chunk["message"].get("thinking") is not None:
500+
if self.started_reasoning_content is False:
501+
reasoning_content = chunk["message"].get("thinking")
502+
self.started_reasoning_content = True
503+
elif self.finished_reasoning_content is False:
504+
reasoning_content = chunk["message"].get("thinking")
505+
self.finished_reasoning_content = True
506+
elif chunk["message"].get("content") is not None:
507+
if "<think>" in chunk["message"].get("content"):
508+
reasoning_content = (
509+
chunk["message"].get("content").replace("<think>", "")
510+
)
511+
512+
self.started_reasoning_content = True
513+
514+
if (
515+
"</think>" in chunk["message"].get("content")
516+
and self.started_reasoning_content
517+
):
518+
reasoning_content = chunk["message"].get("content")
519+
remaining_content = (
520+
chunk["message"].get("content").split("</think>")
521+
)
522+
if len(remaining_content) > 1:
523+
content = remaining_content[1]
524+
self.finished_reasoning_content = True
525+
526+
if (
527+
self.started_reasoning_content is True
528+
and self.finished_reasoning_content is False
529+
):
530+
reasoning_content = (
531+
chunk["message"].get("content").replace("<think>", "")
532+
)
533+
else:
534+
content = chunk["message"].get("content")
535+
493536
delta = Delta(
494-
content=chunk["message"].get("content", ""),
537+
content=content,
538+
reasoning_content=reasoning_content,
495539
tool_calls=tool_calls,
496540
)
497541

litellm/proxy/_new_secret_config.yaml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,3 @@ model_list:
2121

2222
router_settings:
2323
model_group_alias: {"my-fake-gpt-4": "fake-openai-endpoint"}
24-
25-
litellm_settings:
26-
callbacks: ["otel"]
27-
cache: true
28-
cache_params:
29-
type: redis
30-
ttl: 600
31-
supported_call_types: ["acompletion", "completion"]
32-
33-
model_group_settings:
34-
forward_client_headers_to_llm_api:
35-
- fake-openai-endpoint

0 commit comments

Comments
 (0)