docs: Update docstrings of OpenAI Generators to use max_completion_tokens (#9874)

sjrl · web-flow · commit 0cd297adc806 · 2025-10-15T09:26:59.000Z
* Update docstrings to use max_completion_tokens instead of deprecated max_tokens

* Change more instances of max_tokens

* update tests
diff --git a/haystack/components/extractors/llm_metadata_extractor.py b/haystack/components/extractors/llm_metadata_extractor.py
@@ -90,7 +90,7 @@ class LLMMetadataExtractor:
 
     chat_generator = OpenAIChatGenerator(
         generation_kwargs={
-            "max_tokens": 500,
+            "max_completion_tokens": 500,
             "temperature": 0.0,
             "seed": 0,
             "response_format": {"type": "json_object"},
diff --git a/haystack/components/generators/azure.py b/haystack/components/generators/azure.py
@@ -99,7 +99,8 @@ def __init__(  # pylint: disable=too-many-positional-arguments  # noqa: PLR0913
             the OpenAI endpoint. See [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for
             more details.
             Some of the supported parameters:
-            - `max_tokens`: The maximum number of tokens the output text can have.
+            - `max_completion_tokens`: An upper bound for the number of tokens that can be generated for a completion,
+                including visible output tokens and reasoning tokens.
             - `temperature`: The sampling temperature to use. Higher values mean the model takes more risks.
                 Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
             - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py
@@ -110,7 +110,8 @@ def __init__(  # pylint: disable=too-many-positional-arguments
         :param generation_kwargs: Other parameters to use for the model. These parameters are sent directly to
             the OpenAI endpoint. For details, see [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).
             Some of the supported parameters:
-            - `max_tokens`: The maximum number of tokens the output text can have.
+            - `max_completion_tokens`: An upper bound for the number of tokens that can be generated for a completion,
+                including visible output tokens and reasoning tokens.
             - `temperature`: The sampling temperature to use. Higher values mean the model takes more risks.
                 Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
             - `top_p`: Nucleus sampling is an alternative to sampling with temperature, where the model considers
diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py
@@ -127,7 +127,8 @@ def __init__(  # pylint: disable=too-many-positional-arguments
             the OpenAI endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat) for
             more details.
             Some of the supported parameters:
-            - `max_tokens`: The maximum number of tokens the output text can have.
+            - `max_completion_tokens`: An upper bound for the number of tokens that can be generated for a completion,
+                including visible output tokens and reasoning tokens.
             - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks.
                 Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
             - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
@@ -511,7 +512,7 @@ def _check_finish_reason(meta: dict[str, Any]) -> None:
     if meta["finish_reason"] == "length":
         logger.warning(
             "The completion for index {index} has been truncated before reaching a natural stopping point. "
-            "Increase the max_tokens parameter to allow for longer completions.",
+            "Increase the max_completion_tokens parameter to allow for longer completions.",
             index=meta["index"],
             finish_reason=meta["finish_reason"],
         )
diff --git a/haystack/components/generators/openai.py b/haystack/components/generators/openai.py
@@ -92,7 +92,8 @@ def __init__(  # pylint: disable=too-many-positional-arguments
             the OpenAI endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat) for
             more details.
             Some of the supported parameters:
-            - `max_tokens`: The maximum number of tokens the output text can have.
+            - `max_completion_tokens`: An upper bound for the number of tokens that can be generated for a completion,
+                including visible output tokens and reasoning tokens.
             - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks.
                 Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
             - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py
@@ -94,15 +94,15 @@ def test_init_with_parameters(self, tools):
             api_key=Secret.from_token("test-api-key"),
             azure_endpoint="some-non-existing-endpoint",
             streaming_callback=print_streaming_chunk,
-            generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+            generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
             tools=tools,
             tools_strict=True,
             azure_ad_token_provider=default_azure_ad_token_provider,
         )
         assert component.client.api_key == "test-api-key"
         assert component.azure_deployment == "gpt-4o-mini"
         assert component.streaming_callback is print_streaming_chunk
-        assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
+        assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.tools == tools
         assert component.tools_strict
         assert component.azure_ad_token_provider is not None
@@ -114,7 +114,7 @@ def test_init_with_0_max_retries(self, tools):
             api_key=Secret.from_token("test-api-key"),
             azure_endpoint="some-non-existing-endpoint",
             streaming_callback=print_streaming_chunk,
-            generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+            generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
             tools=tools,
             tools_strict=True,
             azure_ad_token_provider=default_azure_ad_token_provider,
@@ -123,7 +123,7 @@ def test_init_with_0_max_retries(self, tools):
         assert component.client.api_key == "test-api-key"
         assert component.azure_deployment == "gpt-4o-mini"
         assert component.streaming_callback is print_streaming_chunk
-        assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
+        assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.tools == tools
         assert component.tools_strict
         assert component.azure_ad_token_provider is not None
@@ -164,7 +164,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
             timeout=2.5,
             max_retries=10,
             generation_kwargs={
-                "max_tokens": 10,
+                "max_completion_tokens": 10,
                 "some_test_param": "test-params",
                 "response_format": calendar_event_model,
             },
@@ -185,7 +185,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
                 "timeout": 2.5,
                 "max_retries": 10,
                 "generation_kwargs": {
-                    "max_tokens": 10,
+                    "max_completion_tokens": 10,
                     "some_test_param": "test-params",
                     "response_format": {
                         "type": "json_schema",
@@ -435,14 +435,14 @@ def test_init_should_also_create_async_client_with_same_args(self, tools):
             api_key=Secret.from_token("test-api-key"),
             azure_endpoint="some-non-existing-endpoint",
             streaming_callback=print_streaming_chunk,
-            generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+            generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
             tools=tools,
             tools_strict=True,
         )
         assert component.async_client.api_key == "test-api-key"
         assert component.azure_deployment == "gpt-4o-mini"
         assert component.streaming_callback is print_streaming_chunk
-        assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
+        assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.tools == tools
         assert component.tools_strict
 
diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py
@@ -213,7 +213,7 @@ def test_init_with_parameters(self, monkeypatch):
             model="gpt-4o-mini",
             streaming_callback=print_streaming_chunk,
             api_base_url="test-base-url",
-            generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+            generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
             timeout=40.0,
             max_retries=1,
             tools=[tool],
@@ -223,7 +223,7 @@ def test_init_with_parameters(self, monkeypatch):
         assert component.client.api_key == "test-api-key"
         assert component.model == "gpt-4o-mini"
         assert component.streaming_callback is print_streaming_chunk
-        assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
+        assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.client.timeout == 40.0
         assert component.client.max_retries == 1
         assert component.tools == [tool]
@@ -238,12 +238,12 @@ def test_init_with_parameters_and_env_vars(self, monkeypatch):
             model="gpt-4o-mini",
             streaming_callback=print_streaming_chunk,
             api_base_url="test-base-url",
-            generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+            generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
         )
         assert component.client.api_key == "test-api-key"
         assert component.model == "gpt-4o-mini"
         assert component.streaming_callback is print_streaming_chunk
-        assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
+        assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.client.timeout == 100.0
         assert component.client.max_retries == 10
 
@@ -278,7 +278,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
             streaming_callback=print_streaming_chunk,
             api_base_url="test-base-url",
             generation_kwargs={
-                "max_tokens": 10,
+                "max_completion_tokens": 10,
                 "some_test_param": "test-params",
                 "response_format": calendar_event_model,
             },
@@ -301,7 +301,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model):
                 "timeout": 100.0,
                 "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
                 "generation_kwargs": {
-                    "max_tokens": 10,
+                    "max_completion_tokens": 10,
                     "some_test_param": "test-params",
                     "response_format": {
                         "type": "json_schema",
@@ -377,7 +377,7 @@ def test_from_dict(self, monkeypatch):
                 "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
                 "max_retries": 10,
                 "timeout": 100.0,
-                "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
+                "generation_kwargs": {"max_completion_tokens": 10, "some_test_param": "test-params"},
                 "tools": [
                     {
                         "type": "haystack.tools.tool.Tool",
@@ -399,7 +399,7 @@ def test_from_dict(self, monkeypatch):
         assert component.model == "gpt-4o-mini"
         assert component.streaming_callback is print_streaming_chunk
         assert component.api_base_url == "test-base-url"
-        assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
+        assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.api_key == Secret.from_env_var("OPENAI_API_KEY")
         assert component.tools == [
             Tool(name="name", description="description", parameters={"x": {"type": "string"}}, function=print)
@@ -419,7 +419,7 @@ def test_from_dict_fail_wo_env_var(self, monkeypatch):
                 "organization": None,
                 "api_base_url": "test-base-url",
                 "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
-                "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
+                "generation_kwargs": {"max_completion_tokens": 10, "some_test_param": "test-params"},
                 "tools": None,
             },
         }
@@ -439,13 +439,14 @@ def test_run(self, chat_messages, openai_mock_chat_completion):
 
     def test_run_with_params(self, chat_messages, openai_mock_chat_completion):
         component = OpenAIChatGenerator(
-            api_key=Secret.from_token("test-api-key"), generation_kwargs={"max_tokens": 10, "temperature": 0.5}
+            api_key=Secret.from_token("test-api-key"),
+            generation_kwargs={"max_completion_tokens": 10, "temperature": 0.5},
         )
         response = component.run(chat_messages)
 
         # check that the component calls the OpenAI API with the correct parameters
         _, kwargs = openai_mock_chat_completion.call_args
-        assert kwargs["max_tokens"] == 10
+        assert kwargs["max_completion_tokens"] == 10
         assert kwargs["temperature"] == 0.5
 
         # check that the tools are not passed to the OpenAI API (the generator is initialized without tools)
@@ -573,7 +574,7 @@ def test_check_abnormal_completions(self, caplog):
         # check truncation warning
         message_template = (
             "The completion for index {index} has been truncated before reaching a natural stopping point. "
-            "Increase the max_tokens parameter to allow for longer completions."
+            "Increase the max_completion_tokens parameter to allow for longer completions."
         )
 
         for index in [1, 3]:
diff --git a/test/components/generators/chat/test_openai_async.py b/test/components/generators/chat/test_openai_async.py
@@ -118,13 +118,14 @@ async def test_run_async(self, chat_messages, openai_mock_async_chat_completion)
     @pytest.mark.asyncio
     async def test_run_with_params_async(self, chat_messages, openai_mock_async_chat_completion):
         component = OpenAIChatGenerator(
-            api_key=Secret.from_token("test-api-key"), generation_kwargs={"max_tokens": 10, "temperature": 0.5}
+            api_key=Secret.from_token("test-api-key"),
+            generation_kwargs={"max_completion_tokens": 10, "temperature": 0.5},
         )
         response = await component.run_async(chat_messages)
 
         # check that the component calls the OpenAI API with the correct parameters
         _, kwargs = openai_mock_async_chat_completion.call_args
-        assert kwargs["max_tokens"] == 10
+        assert kwargs["max_completion_tokens"] == 10
         assert kwargs["temperature"] == 0.5
 
         # check that the tools are not passed to the OpenAI API (the generator is initialized without tools)
diff --git a/test/components/generators/test_azure.py b/test/components/generators/test_azure.py
@@ -35,14 +35,14 @@ def test_init_with_parameters(self):
             azure_endpoint="some-non-existing-endpoint",
             azure_deployment="gpt-4o-mini",
             streaming_callback=print_streaming_chunk,
-            generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+            generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
             azure_ad_token_provider=default_azure_ad_token_provider,
         )
         assert component.client.api_key == "fake-api-key"
         assert component.azure_deployment == "gpt-4o-mini"
         assert component.streaming_callback is print_streaming_chunk
         assert component.timeout == 30.0
-        assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
+        assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.azure_ad_token_provider is not None
         assert component.max_retries == 5
 
@@ -53,15 +53,15 @@ def test_init_with_0_max_retries(self):
             azure_endpoint="some-non-existing-endpoint",
             azure_deployment="gpt-4o-mini",
             streaming_callback=print_streaming_chunk,
-            generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+            generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
             azure_ad_token_provider=default_azure_ad_token_provider,
             max_retries=0,
         )
         assert component.client.api_key == "fake-api-key"
         assert component.azure_deployment == "gpt-4o-mini"
         assert component.streaming_callback is print_streaming_chunk
         assert component.timeout == 30.0
-        assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
+        assert component.generation_kwargs == {"max_completion_tokens": 10, "some_test_param": "test-params"}
         assert component.azure_ad_token_provider is not None
         assert component.max_retries == 0
 
@@ -99,7 +99,7 @@ def test_to_dict_with_parameters(self, monkeypatch):
             timeout=3.5,
             max_retries=10,
             http_client_kwargs={"proxy": "http://localhost:8080"},
-            generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+            generation_kwargs={"max_completion_tokens": 10, "some_test_param": "test-params"},
             azure_ad_token_provider=default_azure_ad_token_provider,
         )
 
@@ -118,7 +118,7 @@ def test_to_dict_with_parameters(self, monkeypatch):
                 "timeout": 3.5,
                 "max_retries": 10,
                 "http_client_kwargs": {"proxy": "http://localhost:8080"},
-                "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
+                "generation_kwargs": {"max_completion_tokens": 10, "some_test_param": "test-params"},
                 "default_headers": {},
                 "azure_ad_token_provider": "haystack.utils.azure.default_azure_ad_token_provider",
             },
diff --git a/test/components/generators/test_openai.py b/test/components/generators/test_openai.py