19
19
from litellm .secret_managers .main import get_secret_str
20
20
from litellm .types .llms .openai import AllMessageValues , ChatCompletionUsageBlock
21
21
from litellm .types .utils import (
22
+ Delta ,
22
23
GenericStreamingChunk ,
23
24
ModelInfoBase ,
24
25
ModelResponse ,
25
26
ModelResponseStream ,
26
27
ProviderField ,
27
28
StreamingChoices ,
28
- Delta ,
29
29
)
30
30
31
31
from ..common_utils import OllamaError , _convert_image
@@ -92,9 +92,9 @@ class OllamaConfig(BaseConfig):
92
92
repeat_penalty : Optional [float ] = None
93
93
temperature : Optional [float ] = None
94
94
seed : Optional [int ] = None
95
- stop : Optional [
96
- list
97
- ] = None # stop is a list based on this - https://github.com/ollama/ollama/pull/442
95
+ stop : Optional [list ] = (
96
+ None # stop is a list based on this - https://github.com/ollama/ollama/pull/442
97
+ )
98
98
tfs_z : Optional [float ] = None
99
99
num_predict : Optional [int ] = None
100
100
top_k : Optional [int ] = None
@@ -154,6 +154,7 @@ def get_supported_openai_params(self, model: str):
154
154
"stop" ,
155
155
"response_format" ,
156
156
"max_completion_tokens" ,
157
+ "reasoning_effort" ,
157
158
]
158
159
159
160
def map_openai_params (
@@ -166,19 +167,21 @@ def map_openai_params(
166
167
for param , value in non_default_params .items ():
167
168
if param == "max_tokens" or param == "max_completion_tokens" :
168
169
optional_params ["num_predict" ] = value
169
- if param == "stream" :
170
+ elif param == "stream" :
170
171
optional_params ["stream" ] = value
171
- if param == "temperature" :
172
+ elif param == "temperature" :
172
173
optional_params ["temperature" ] = value
173
- if param == "seed" :
174
+ elif param == "seed" :
174
175
optional_params ["seed" ] = value
175
- if param == "top_p" :
176
+ elif param == "top_p" :
176
177
optional_params ["top_p" ] = value
177
- if param == "frequency_penalty" :
178
+ elif param == "frequency_penalty" :
178
179
optional_params ["frequency_penalty" ] = value
179
- if param == "stop" :
180
+ elif param == "stop" :
180
181
optional_params ["stop" ] = value
181
- if param == "response_format" and isinstance (value , dict ):
182
+ elif param == "reasoning_effort" and value is not None :
183
+ optional_params ["think" ] = True
184
+ elif param == "response_format" and isinstance (value , dict ):
182
185
if value ["type" ] == "json_object" :
183
186
optional_params ["format" ] = "json"
184
187
elif value ["type" ] == "json_schema" :
@@ -258,12 +261,17 @@ def transform_response(
258
261
api_key : Optional [str ] = None ,
259
262
json_mode : Optional [bool ] = None ,
260
263
) -> ModelResponse :
264
+ from litellm .litellm_core_utils .llm_response_utils .convert_dict_to_response import (
265
+ _parse_content_for_reasoning ,
266
+ )
267
+
261
268
response_json = raw_response .json ()
262
269
## RESPONSE OBJECT
263
270
model_response .choices [0 ].finish_reason = "stop"
264
271
if request_data .get ("format" , "" ) == "json" :
265
272
# Check if response field exists and is not empty before parsing JSON
266
273
response_text = response_json .get ("response" , "" )
274
+
267
275
if not response_text or not response_text .strip ():
268
276
# Handle empty response gracefully - set empty content
269
277
message = litellm .Message (content = "" )
@@ -288,7 +296,9 @@ def transform_response(
288
296
"id" : f"call_{ str (uuid .uuid4 ())} " ,
289
297
"function" : {
290
298
"name" : function_call ["name" ],
291
- "arguments" : json .dumps (function_call ["arguments" ]),
299
+ "arguments" : json .dumps (
300
+ function_call ["arguments" ]
301
+ ),
292
302
},
293
303
"type" : "function" ,
294
304
}
@@ -305,11 +315,26 @@ def transform_response(
305
315
model_response .choices [0 ].finish_reason = "stop"
306
316
except json .JSONDecodeError :
307
317
# If JSON parsing fails, treat as regular text response
308
- message = litellm .Message (content = response_text )
318
+ ## output parse reasoning content from response_text
319
+ reasoning_content : Optional [str ] = None
320
+ content : Optional [str ] = None
321
+ if response_text is not None :
322
+ reasoning_content , content = _parse_content_for_reasoning (
323
+ response_text
324
+ )
325
+ message = litellm .Message (
326
+ content = content , reasoning_content = reasoning_content
327
+ )
309
328
model_response .choices [0 ].message = message # type: ignore
310
329
model_response .choices [0 ].finish_reason = "stop"
311
330
else :
312
- model_response .choices [0 ].message .content = response_json ["response" ] # type: ignore
331
+ response_text = response_json .get ("response" , "" )
332
+ content = None
333
+ reasoning_content = None
334
+ if response_text is not None :
335
+ reasoning_content , content = _parse_content_for_reasoning (response_text )
336
+ model_response .choices [0 ].message .content = content # type: ignore
337
+ model_response .choices [0 ].message .reasoning_content = reasoning_content # type: ignore
313
338
model_response .created = int (time .time ())
314
339
model_response .model = "ollama/" + model
315
340
_prompt = request_data .get ("prompt" , "" )
@@ -434,12 +459,21 @@ def get_model_response_iterator(
434
459
435
460
436
461
class OllamaTextCompletionResponseIterator (BaseModelResponseIterator ):
462
+ def __init__ (
463
+ self , streaming_response , sync_stream : bool , json_mode : Optional [bool ] = False
464
+ ):
465
+ super ().__init__ (streaming_response , sync_stream , json_mode )
466
+ self .started_reasoning_content : bool = False
467
+ self .finished_reasoning_content : bool = False
468
+
437
469
def _handle_string_chunk (
438
470
self , str_line : str
439
471
) -> Union [GenericStreamingChunk , ModelResponseStream ]:
440
472
return self .chunk_parser (json .loads (str_line ))
441
473
442
- def chunk_parser (self , chunk : dict ) -> Union [GenericStreamingChunk , ModelResponseStream ]:
474
+ def chunk_parser (
475
+ self , chunk : dict
476
+ ) -> Union [GenericStreamingChunk , ModelResponseStream ]:
443
477
try :
444
478
if "error" in chunk :
445
479
raise Exception (f"Ollama Error - { chunk } " )
@@ -469,12 +503,42 @@ def chunk_parser(self, chunk: dict) -> Union[GenericStreamingChunk, ModelRespons
469
503
)
470
504
elif chunk ["response" ]:
471
505
text = chunk ["response" ]
472
- return GenericStreamingChunk (
473
- text = text ,
474
- is_finished = is_finished ,
475
- finish_reason = "stop" ,
506
+ reasoning_content : Optional [str ] = None
507
+ content : Optional [str ] = None
508
+ if text is not None :
509
+ if "<think>" in text :
510
+ text = text .replace ("<think>" , "" )
511
+ self .started_reasoning_content = True
512
+ elif "</think>" in text :
513
+ text = text .replace ("</think>" , "" )
514
+ self .finished_reasoning_content = True
515
+
516
+ if (
517
+ self .started_reasoning_content
518
+ and not self .finished_reasoning_content
519
+ ):
520
+ reasoning_content = text
521
+ else :
522
+ content = text
523
+
524
+ return ModelResponseStream (
525
+ choices = [
526
+ StreamingChoices (
527
+ index = 0 ,
528
+ delta = Delta (
529
+ reasoning_content = reasoning_content , content = content
530
+ ),
531
+ )
532
+ ],
533
+ finish_reason = finish_reason ,
476
534
usage = None ,
477
535
)
536
+ # return GenericStreamingChunk(
537
+ # text=text,
538
+ # is_finished=is_finished,
539
+ # finish_reason="stop",
540
+ # usage=None,
541
+ # )
478
542
elif "thinking" in chunk and not chunk ["response" ]:
479
543
# Return reasoning content as ModelResponseStream so UIs can render it
480
544
thinking_content = chunk .get ("thinking" ) or ""
0 commit comments