Skip to content

Commit dbe2192

Browse files
DEFAULT_MAX_TOKENS = 8000
1 parent a5a3448 commit dbe2192

File tree

1 file changed

+8
-7
lines changed

1 file changed

+8
-7
lines changed

src/smolagents/models.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
"value": "Thought: .+?\\nCode:\\n```(?:py|python)?\\n(?:.|\\s)+?\\n```<end_code>",
5353
}
5454

55+
DEFAULT_MAX_TOKENS = 8000
5556
try:
5657
import litellm
5758

@@ -158,7 +159,7 @@ def __call__(
158159
messages: List[Dict[str, str]],
159160
stop_sequences: Optional[List[str]] = None,
160161
grammar: Optional[str] = None,
161-
max_tokens: int = 1500,
162+
max_tokens: int = DEFAULT_MAX_TOKENS,
162163
) -> str:
163164
"""Process the input messages and return the model's response.
164165
@@ -211,7 +212,7 @@ class HfApiModel(Model):
211212
... token="your_hf_token_here",
212213
... )
213214
>>> messages = [{"role": "user", "content": "Explain quantum mechanics in simple terms."}]
214-
>>> response = engine(messages, stop_sequences=["END"], max_tokens=1500)
215+
>>> response = engine(messages, stop_sequences=["END"], max_tokens=DEFAULT_MAX_TOKENS)
215216
>>> print(response)
216217
"Quantum mechanics is the branch of physics that studies..."
217218
```
@@ -236,7 +237,7 @@ def __call__(
236237
messages: List[Dict[str, str]],
237238
stop_sequences: Optional[List[str]] = None,
238239
grammar: Optional[str] = None,
239-
max_tokens: int = 1500,
240+
max_tokens: int = DEFAULT_MAX_TOKENS,
240241
tools_to_call_from: Optional[List[Tool]] = None,
241242
) -> str:
242243
"""
@@ -339,7 +340,7 @@ def __call__(
339340
messages: List[Dict[str, str]],
340341
stop_sequences: Optional[List[str]] = None,
341342
grammar: Optional[str] = None,
342-
max_tokens: int = 1500,
343+
max_tokens: int = DEFAULT_MAX_TOKENS,
343344
tools_to_call_from: Optional[List[Tool]] = None,
344345
) -> ChatCompletionOutputMessage:
345346
messages = get_clean_message_list(
@@ -361,7 +362,7 @@ def __call__(
361362
)
362363
prompt_tensor = prompt_tensor.to(self.model.device)
363364
count_prompt_tokens = prompt_tensor["input_ids"].shape[1]
364-
365+
365366
out = self.model.generate(
366367
**prompt_tensor,
367368
max_new_tokens=max_tokens,
@@ -424,7 +425,7 @@ def __call__(
424425
messages: List[Dict[str, str]],
425426
stop_sequences: Optional[List[str]] = None,
426427
grammar: Optional[str] = None,
427-
max_tokens: int = 1500,
428+
max_tokens: int = DEFAULT_MAX_TOKENS,
428429
tools_to_call_from: Optional[List[Tool]] = None,
429430
) -> str:
430431
messages = get_clean_message_list(
@@ -495,7 +496,7 @@ def __call__(
495496
messages: List[Dict[str, str]],
496497
stop_sequences: Optional[List[str]] = None,
497498
grammar: Optional[str] = None,
498-
max_tokens: int = 1500,
499+
max_tokens: int = DEFAULT_MAX_TOKENS,
499500
tools_to_call_from: Optional[List[Tool]] = None,
500501
) -> str:
501502
messages = get_clean_message_list(

0 commit comments

Comments
 (0)