@@ -119,6 +119,10 @@ class BaseCompletionModel(MoatlessComponent, ABC):
119119 default = False ,
120120 description = "Whether to merge messages with the same role into a single message as this is required by models like Deepseek-R1" ,
121121 )
122+ reasoning_effort : Optional [str ] = Field (
123+ default = None ,
124+ description = "The reasoning effort level for the completion (e.g., 'low', 'medium', 'high')" ,
125+ )
122126
123127 _response_schema : Optional [list [type [ResponseSchema ]]] = PrivateAttr (default = None )
124128 _system_prompt : Optional [str ] = PrivateAttr (default = None )
@@ -484,15 +488,20 @@ async def _do_completion_with_rate_limit_retry():
484488 if "claude" in self .model :
485489 self ._inject_prompt_caching (messages )
486490
487- response = await litellm . acompletion (
488- model = self .model ,
489- max_tokens = self .max_tokens ,
490- temperature = self .temperature ,
491- messages = messages ,
492- metadata = self .metadata or {},
493- timeout = self .timeout ,
491+ completion_kwargs = {
492+ " model" : self .model ,
493+ " max_tokens" : self .max_tokens ,
494+ " temperature" : self .temperature ,
495+ " messages" : messages ,
496+ " metadata" : self .metadata or {},
497+ " timeout" : self .timeout ,
494498 ** self ._completion_params ,
495- )
499+ }
500+
501+ if self .reasoning_effort :
502+ completion_kwargs ["reasoning_effort" ] = self .reasoning_effort
503+
504+ response = await litellm .acompletion (** completion_kwargs )
496505
497506 if invocation .current_attempt :
498507 invocation .current_attempt .update_from_response (response , self .model )
0 commit comments