Skip to content

Commit 0cd297a

Browse files
authored
docs: Update docstrings of OpenAI Generators to use max_completion_tokens (#9874)
* Update docstrings to use max_completion_tokens instead of deprecated max_tokens * Change more instances of max_tokens * update tests
1 parent d77a853 commit 0cd297a

File tree

10 files changed

+50
-43
lines changed

10 files changed

+50
-43
lines changed

haystack/components/extractors/llm_metadata_extractor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class LLMMetadataExtractor:
9090
9191
chat_generator = OpenAIChatGenerator(
9292
generation_kwargs={
93-
"max_tokens": 500,
93+
"max_completion_tokens": 500,
9494
"temperature": 0.0,
9595
"seed": 0,
9696
"response_format": {"type": "json_object"},

haystack/components/generators/azure.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ def __init__( # pylint: disable=too-many-positional-arguments # noqa: PLR0913
9999
the OpenAI endpoint. See [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for
100100
more details.
101101
Some of the supported parameters:
102-
- `max_tokens`: The maximum number of tokens the output text can have.
102+
- `max_completion_tokens`: An upper bound for the number of tokens that can be generated for a completion,
103+
including visible output tokens and reasoning tokens.
103104
- `temperature`: The sampling temperature to use. Higher values mean the model takes more risks.
104105
Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
105106
- `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model

haystack/components/generators/chat/azure.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ def __init__( # pylint: disable=too-many-positional-arguments
110110
:param generation_kwargs: Other parameters to use for the model. These parameters are sent directly to
111111
the OpenAI endpoint. For details, see [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).
112112
Some of the supported parameters:
113-
- `max_tokens`: The maximum number of tokens the output text can have.
113+
- `max_completion_tokens`: An upper bound for the number of tokens that can be generated for a completion,
114+
including visible output tokens and reasoning tokens.
114115
- `temperature`: The sampling temperature to use. Higher values mean the model takes more risks.
115116
Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
116117
- `top_p`: Nucleus sampling is an alternative to sampling with temperature, where the model considers

haystack/components/generators/chat/openai.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,8 @@ def __init__( # pylint: disable=too-many-positional-arguments
127127
the OpenAI endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat) for
128128
more details.
129129
Some of the supported parameters:
130-
- `max_tokens`: The maximum number of tokens the output text can have.
130+
- `max_completion_tokens`: An upper bound for the number of tokens that can be generated for a completion,
131+
including visible output tokens and reasoning tokens.
131132
- `temperature`: What sampling temperature to use. Higher values mean the model will take more risks.
132133
Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
133134
- `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
@@ -511,7 +512,7 @@ def _check_finish_reason(meta: dict[str, Any]) -> None:
511512
if meta["finish_reason"] == "length":
512513
logger.warning(
513514
"The completion for index {index} has been truncated before reaching a natural stopping point. "
514-
"Increase the max_tokens parameter to allow for longer completions.",
515+
"Increase the max_completion_tokens parameter to allow for longer completions.",
515516
index=meta["index"],
516517
finish_reason=meta["finish_reason"],
517518
)

haystack/components/generators/openai.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ def __init__( # pylint: disable=too-many-positional-arguments
9292
the OpenAI endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat) for
9393
more details.
9494
Some of the supported parameters:
95-
- `max_tokens`: The maximum number of tokens the output text can have.
95+
- `max_completion_tokens`: An upper bound for the number of tokens that can be generated for a completion,
96+
including visible output tokens and reasoning tokens.
9697
- `temperature`: What sampling temperature to use. Higher values mean the model will take more risks.
9798
Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
9899
- `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model

test/components/generators/chat/test_azure.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,15 @@ def test_init_with_parameters(self, tools):
9494
api_key=Secret.from_token("test-api-key"),
9595
azure_endpoint="some-non-existing-endpoint",
9696
streaming_callback=print_streaming_chunk,
97-
generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
97+
generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
9898
tools=tools,
9999
tools_strict=True,
100100
azure_ad_token_provider=default_azure_ad_token_provider,
101101
)
102102
assert component.client.api_key == "test-api-key"
103103
assert component.azure_deployment == "gpt-4o-mini"
104104
assert component.streaming_callback is print_streaming_chunk
105-
assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
105+
assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
106106
assert component.tools == tools
107107
assert component.tools_strict
108108
assert component.azure_ad_token_provider is not None
@@ -114,7 +114,7 @@ def test_init_with_0_max_retries(self, tools):
114114
api_key=Secret.from_token("test-api-key"),
115115
azure_endpoint="some-non-existing-endpoint",
116116
streaming_callback=print_streaming_chunk,
117-
generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
117+
generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
118118
tools=tools,
119119
tools_strict=True,
120120
azure_ad_token_provider=default_azure_ad_token_provider,
@@ -123,7 +123,7 @@ def test_init_with_0_max_retries(self, tools):
123123
assert component.client.api_key == "test-api-key"
124124
assert component.azure_deployment == "gpt-4o-mini"
125125
assert component.streaming_callback is print_streaming_chunk
126-
assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
126+
assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
127127
assert component.tools == tools
128128
assert component.tools_strict
129129
assert component.azure_ad_token_provider is not None
@@ -164,7 +164,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
164164
timeout=2.5,
165165
max_retries=10,
166166
generation_kwargs={
167-
"max_tokens": 10,
167+
"max_completion_tokens": 10,
168168
"some_test_param": "test-params",
169169
"response_format": calendar_event_model,
170170
},
@@ -185,7 +185,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
185185
"timeout": 2.5,
186186
"max_retries": 10,
187187
"generation_kwargs": {
188-
"max_tokens": 10,
188+
"max_completion_tokens": 10,
189189
"some_test_param": "test-params",
190190
"response_format": {
191191
"type": "json_schema",
@@ -435,14 +435,14 @@ def test_init_should_also_create_async_client_with_same_args(self, tools):
435435
api_key=Secret.from_token("test-api-key"),
436436
azure_endpoint="some-non-existing-endpoint",
437437
streaming_callback=print_streaming_chunk,
438-
generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
438+
generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
439439
tools=tools,
440440
tools_strict=True,
441441
)
442442
assert component.async_client.api_key == "test-api-key"
443443
assert component.azure_deployment == "gpt-4o-mini"
444444
assert component.streaming_callback is print_streaming_chunk
445-
assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
445+
assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
446446
assert component.tools == tools
447447
assert component.tools_strict
448448

test/components/generators/chat/test_openai.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def test_init_with_parameters(self, monkeypatch):
213213
model="gpt-4o-mini",
214214
streaming_callback=print_streaming_chunk,
215215
api_base_url="test-base-url",
216-
generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
216+
generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
217217
timeout=40.0,
218218
max_retries=1,
219219
tools=[tool],
@@ -223,7 +223,7 @@ def test_init_with_parameters(self, monkeypatch):
223223
assert component.client.api_key == "test-api-key"
224224
assert component.model == "gpt-4o-mini"
225225
assert component.streaming_callback is print_streaming_chunk
226-
assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
226+
assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
227227
assert component.client.timeout == 40.0
228228
assert component.client.max_retries == 1
229229
assert component.tools == [tool]
@@ -238,12 +238,12 @@ def test_init_with_parameters_and_env_vars(self, monkeypatch):
238238
model="gpt-4o-mini",
239239
streaming_callback=print_streaming_chunk,
240240
api_base_url="test-base-url",
241-
generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
241+
generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
242242
)
243243
assert component.client.api_key == "test-api-key"
244244
assert component.model == "gpt-4o-mini"
245245
assert component.streaming_callback is print_streaming_chunk
246-
assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
246+
assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
247247
assert component.client.timeout == 100.0
248248
assert component.client.max_retries == 10
249249

@@ -278,7 +278,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
278278
streaming_callback=print_streaming_chunk,
279279
api_base_url="test-base-url",
280280
generation_kwargs={
281-
"max_tokens": 10,
281+
"max_completion_tokens": 10,
282282
"some_test_param": "test-params",
283283
"response_format": calendar_event_model,
284284
},
@@ -301,7 +301,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
301301
"timeout": 100.0,
302302
"streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
303303
"generation_kwargs": {
304-
"max_tokens": 10,
304+
"max_completion_tokens": 10,
305305
"some_test_param": "test-params",
306306
"response_format": {
307307
"type": "json_schema",
@@ -377,7 +377,7 @@ def test_from_dict(self, monkeypatch):
377377
"streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
378378
"max_retries": 10,
379379
"timeout": 100.0,
380-
"generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
380+
"generation_kwargs": {"max_completion_tokens": 10, "some_test_param": "test-params"},
381381
"tools": [
382382
{
383383
"type": "haystack.tools.tool.Tool",
@@ -399,7 +399,7 @@ def test_from_dict(self, monkeypatch):
399399
assert component.model == "gpt-4o-mini"
400400
assert component.streaming_callback is print_streaming_chunk
401401
assert component.api_base_url == "test-base-url"
402-
assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
402+
assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
403403
assert component.api_key == Secret.from_env_var("OPENAI_API_KEY")
404404
assert component.tools == [
405405
Tool(name="name", description="description", parameters={"x": {"type": "string"}}, function=print)
@@ -419,7 +419,7 @@ def test_from_dict_fail_wo_env_var(self, monkeypatch):
419419
"organization": None,
420420
"api_base_url": "test-base-url",
421421
"streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
422-
"generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
422+
"generation_kwargs": {"max_completion_tokens": 10, "some_test_param": "test-params"},
423423
"tools": None,
424424
},
425425
}
@@ -439,13 +439,14 @@ def test_run(self, chat_messages, openai_mock_chat_completion):
439439

440440
def test_run_with_params(self, chat_messages, openai_mock_chat_completion):
441441
component = OpenAIChatGenerator(
442-
api_key=Secret.from_token("test-api-key"), generation_kwargs={"max_tokens": 10, "temperature": 0.5}
442+
api_key=Secret.from_token("test-api-key"),
443+
generation_kwargs={"max_completion_tokens": 10, "temperature": 0.5},
443444
)
444445
response = component.run(chat_messages)
445446

446447
# check that the component calls the OpenAI API with the correct parameters
447448
_, kwargs = openai_mock_chat_completion.call_args
448-
assert kwargs["max_tokens"] == 10
449+
assert kwargs["max_completion_tokens"] == 10
449450
assert kwargs["temperature"] == 0.5
450451

451452
# check that the tools are not passed to the OpenAI API (the generator is initialized without tools)
@@ -573,7 +574,7 @@ def test_check_abnormal_completions(self, caplog):
573574
# check truncation warning
574575
message_template = (
575576
"The completion for index {index} has been truncated before reaching a natural stopping point. "
576-
"Increase the max_tokens parameter to allow for longer completions."
577+
"Increase the max_completion_tokens parameter to allow for longer completions."
577578
)
578579

579580
for index in [1, 3]:

test/components/generators/chat/test_openai_async.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,14 @@ async def test_run_async(self, chat_messages, openai_mock_async_chat_completion)
118118
@pytest.mark.asyncio
119119
async def test_run_with_params_async(self, chat_messages, openai_mock_async_chat_completion):
120120
component = OpenAIChatGenerator(
121-
api_key=Secret.from_token("test-api-key"), generation_kwargs={"max_tokens": 10, "temperature": 0.5}
121+
api_key=Secret.from_token("test-api-key"),
122+
generation_kwargs={"max_completion_tokens": 10, "temperature": 0.5},
122123
)
123124
response = await component.run_async(chat_messages)
124125

125126
# check that the component calls the OpenAI API with the correct parameters
126127
_, kwargs = openai_mock_async_chat_completion.call_args
127-
assert kwargs["max_tokens"] == 10
128+
assert kwargs["max_completion_tokens"] == 10
128129
assert kwargs["temperature"] == 0.5
129130

130131
# check that the tools are not passed to the OpenAI API (the generator is initialized without tools)

test/components/generators/test_azure.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ def test_init_with_parameters(self):
3535
azure_endpoint="some-non-existing-endpoint",
3636
azure_deployment="gpt-4o-mini",
3737
streaming_callback=print_streaming_chunk,
38-
generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
38+
generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
3939
azure_ad_token_provider=default_azure_ad_token_provider,
4040
)
4141
assert component.client.api_key == "fake-api-key"
4242
assert component.azure_deployment == "gpt-4o-mini"
4343
assert component.streaming_callback is print_streaming_chunk
4444
assert component.timeout == 30.0
45-
assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
45+
assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
4646
assert component.azure_ad_token_provider is not None
4747
assert component.max_retries == 5
4848

@@ -53,15 +53,15 @@ def test_init_with_0_max_retries(self):
5353
azure_endpoint="some-non-existing-endpoint",
5454
azure_deployment="gpt-4o-mini",
5555
streaming_callback=print_streaming_chunk,
56-
generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
56+
generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
5757
azure_ad_token_provider=default_azure_ad_token_provider,
5858
max_retries=0,
5959
)
6060
assert component.client.api_key == "fake-api-key"
6161
assert component.azure_deployment == "gpt-4o-mini"
6262
assert component.streaming_callback is print_streaming_chunk
6363
assert component.timeout == 30.0
64-
assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
64+
assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
6565
assert component.azure_ad_token_provider is not None
6666
assert component.max_retries == 0
6767

@@ -99,7 +99,7 @@ def test_to_dict_with_parameters(self, monkeypatch):
9999
timeout=3.5,
100100
max_retries=10,
101101
http_client_kwargs={"proxy": "http://localhost:8080"},
102-
generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
102+
generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
103103
azure_ad_token_provider=default_azure_ad_token_provider,
104104
)
105105

@@ -118,7 +118,7 @@ def test_to_dict_with_parameters(self, monkeypatch):
118118
"timeout": 3.5,
119119
"max_retries": 10,
120120
"http_client_kwargs": {"proxy": "http://localhost:8080"},
121-
"generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
121+
"generation_kwargs": {"max_completion_tokens": 10, "some_test_param": "test-params"},
122122
"default_headers": {},
123123
"azure_ad_token_provider": "haystack.utils.azure.default_azure_ad_token_provider",
124124
},

0 commit comments

Comments
 (0)