5252 "value" : "Thought: .+?\\ nCode:\\ n```(?:py|python)?\\ n(?:.|\\ s)+?\\ n```<end_code>" ,
5353}
5454
55+ DEFAULT_MAX_TOKENS = 8000
5556try :
5657 import litellm
5758
@@ -158,7 +159,7 @@ def __call__(
158159 messages : List [Dict [str , str ]],
159160 stop_sequences : Optional [List [str ]] = None ,
160161 grammar : Optional [str ] = None ,
161- max_tokens : int = 1500 ,
162+ max_tokens : int = DEFAULT_MAX_TOKENS ,
162163 ) -> str :
163164 """Process the input messages and return the model's response.
164165
@@ -211,7 +212,7 @@ class HfApiModel(Model):
211212 ... token="your_hf_token_here",
212213 ... )
213214 >>> messages = [{"role": "user", "content": "Explain quantum mechanics in simple terms."}]
214- >>> response = engine(messages, stop_sequences=["END"], max_tokens=1500 )
215+ >>> response = engine(messages, stop_sequences=["END"], max_tokens=DEFAULT_MAX_TOKENS )
215216 >>> print(response)
216217 "Quantum mechanics is the branch of physics that studies..."
217218 ```
@@ -236,7 +237,7 @@ def __call__(
236237 messages : List [Dict [str , str ]],
237238 stop_sequences : Optional [List [str ]] = None ,
238239 grammar : Optional [str ] = None ,
239- max_tokens : int = 1500 ,
240+ max_tokens : int = DEFAULT_MAX_TOKENS ,
240241 tools_to_call_from : Optional [List [Tool ]] = None ,
241242 ) -> str :
242243 """
@@ -339,7 +340,7 @@ def __call__(
339340 messages : List [Dict [str , str ]],
340341 stop_sequences : Optional [List [str ]] = None ,
341342 grammar : Optional [str ] = None ,
342- max_tokens : int = 1500 ,
343+ max_tokens : int = DEFAULT_MAX_TOKENS ,
343344 tools_to_call_from : Optional [List [Tool ]] = None ,
344345 ) -> ChatCompletionOutputMessage :
345346 messages = get_clean_message_list (
@@ -361,7 +362,7 @@ def __call__(
361362 )
362363 prompt_tensor = prompt_tensor .to (self .model .device )
363364 count_prompt_tokens = prompt_tensor ["input_ids" ].shape [1 ]
364-
365+
365366 out = self .model .generate (
366367 ** prompt_tensor ,
367368 max_new_tokens = max_tokens ,
@@ -424,7 +425,7 @@ def __call__(
424425 messages : List [Dict [str , str ]],
425426 stop_sequences : Optional [List [str ]] = None ,
426427 grammar : Optional [str ] = None ,
427- max_tokens : int = 1500 ,
428+ max_tokens : int = DEFAULT_MAX_TOKENS ,
428429 tools_to_call_from : Optional [List [Tool ]] = None ,
429430 ) -> str :
430431 messages = get_clean_message_list (
@@ -495,7 +496,7 @@ def __call__(
495496 messages : List [Dict [str , str ]],
496497 stop_sequences : Optional [List [str ]] = None ,
497498 grammar : Optional [str ] = None ,
498- max_tokens : int = 1500 ,
499+ max_tokens : int = DEFAULT_MAX_TOKENS ,
499500 tools_to_call_from : Optional [List [Tool ]] = None ,
500501 ) -> str :
501502 messages = get_clean_message_list (
0 commit comments