Skip to content

Commit 4e50ec4

Browse files
authored
feat(openai): enable stream_usage when using default base URL and client (#33205)
1 parent 90e4d94 commit 4e50ec4

File tree

12 files changed

+100
-17
lines changed

12 files changed

+100
-17
lines changed

libs/langchain/tests/unit_tests/chat_models/test_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def test_configurable() -> None:
167167
"store": None,
168168
"extra_body": None,
169169
"include_response_headers": False,
170-
"stream_usage": False,
170+
"stream_usage": True,
171171
"use_previous_response_id": False,
172172
"use_responses_api": None,
173173
},

libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def test_configurable() -> None:
168168
"store": None,
169169
"extra_body": None,
170170
"include_response_headers": False,
171-
"stream_usage": False,
171+
"stream_usage": True,
172172
"use_previous_response_id": False,
173173
"use_responses_api": None,
174174
},

libs/partners/openai/langchain_openai/chat_models/azure.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,25 @@ def validate_environment(self) -> Self:
615615
or os.getenv("OPENAI_ORG_ID")
616616
or os.getenv("OPENAI_ORGANIZATION")
617617
)
618+
619+
# Enable stream_usage by default if using default base URL and client
620+
if all(
621+
getattr(self, key, None) is None
622+
for key in (
623+
"stream_usage",
624+
"openai_proxy",
625+
"openai_api_base",
626+
"base_url",
627+
"client",
628+
"root_client",
629+
"async_client",
630+
"root_async_client",
631+
"http_client",
632+
"http_async_client",
633+
)
634+
):
635+
self.stream_usage = True
636+
618637
# For backwards compatibility. Before openai v1, no distinction was made
619638
# between azure_endpoint and base_url (openai_api_base).
620639
openai_api_base = self.openai_api_base

libs/partners/openai/langchain_openai/chat_models/base.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,13 @@
9999
is_basemodel_subclass,
100100
)
101101
from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env
102-
from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
102+
from pydantic import (
103+
BaseModel,
104+
ConfigDict,
105+
Field,
106+
SecretStr,
107+
model_validator,
108+
)
103109
from pydantic.v1 import BaseModel as BaseModelV1
104110
from typing_extensions import Self
105111

@@ -478,11 +484,18 @@ class BaseChatOpenAI(BaseChatModel):
478484
)
479485
"""Timeout for requests to OpenAI completion API. Can be float, ``httpx.Timeout`` or
480486
None."""
481-
stream_usage: bool = False
482-
"""Whether to include usage metadata in streaming output. If True, an additional
487+
stream_usage: Optional[bool] = None
488+
"""Whether to include usage metadata in streaming output. If enabled, an additional
483489
message chunk will be generated during the stream including usage metadata.
484490
491+
This parameter is enabled unless ``openai_api_base`` is set or the model is
492+
initialized with a custom client, as many chat completions APIs do not support
493+
streaming token usage.
494+
485495
!!! version-added "Added in version 0.3.9"
496+
497+
!!! warning "Behavior changed in 0.3.35"
498+
Enabled for default base URL and client.
486499
"""
487500
max_retries: Optional[int] = None
488501
"""Maximum number of retries to make when generating."""
@@ -761,6 +774,25 @@ def validate_environment(self) -> Self:
761774
or os.getenv("OPENAI_ORGANIZATION")
762775
)
763776
self.openai_api_base = self.openai_api_base or os.getenv("OPENAI_API_BASE")
777+
778+
# Enable stream_usage by default if using default base URL and client
779+
if all(
780+
getattr(self, key, None) is None
781+
for key in (
782+
"stream_usage",
783+
"openai_proxy",
784+
"openai_api_base",
785+
"base_url",
786+
"client",
787+
"root_client",
788+
"async_client",
789+
"root_async_client",
790+
"http_client",
791+
"http_async_client",
792+
)
793+
):
794+
self.stream_usage = True
795+
764796
client_params: dict = {
765797
"api_key": (
766798
self.openai_api_key.get_secret_value() if self.openai_api_key else None
@@ -1073,7 +1105,7 @@ def _should_stream_usage(
10731105
for source in stream_usage_sources:
10741106
if isinstance(source, bool):
10751107
return source
1076-
return self.stream_usage
1108+
return self.stream_usage or False
10771109

10781110
def _stream(
10791111
self,

libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ def chat_model_params(self) -> dict:
2323
"deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
2424
"openai_api_version": OPENAI_API_VERSION,
2525
"azure_endpoint": OPENAI_API_BASE,
26-
"stream_usage": True,
2726
}
2827

2928
@property
@@ -83,7 +82,6 @@ def chat_model_params(self) -> dict:
8382
"deployment_name": os.environ["AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME"],
8483
"openai_api_version": OPENAI_API_VERSION,
8584
"azure_endpoint": OPENAI_API_BASE,
86-
"stream_usage": True,
8785
}
8886

8987
@property

libs/partners/openai/tests/integration_tests/chat_models/test_base.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def test_openai_invoke() -> None:
200200

201201
def test_stream() -> None:
202202
"""Test streaming tokens from OpenAI."""
203-
llm = ChatOpenAI()
203+
llm = ChatOpenAI(model="gpt-4.1-mini")
204204

205205
full: Optional[BaseMessageChunk] = None
206206
for chunk in llm.stream("I'm Pickle Rick"):
@@ -214,7 +214,7 @@ def test_stream() -> None:
214214
aggregate: Optional[BaseMessageChunk] = None
215215
chunks_with_token_counts = 0
216216
chunks_with_response_metadata = 0
217-
for chunk in llm.stream("Hello", stream_usage=True):
217+
for chunk in llm.stream("Hello"):
218218
assert isinstance(chunk.content, str)
219219
aggregate = chunk if aggregate is None else aggregate + chunk
220220
assert isinstance(chunk, AIMessageChunk)
@@ -281,13 +281,14 @@ async def _test_stream(stream: AsyncIterator, expect_usage: bool) -> None:
281281
assert chunks_with_token_counts == 0
282282
assert full.usage_metadata is None
283283

284-
llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
285-
await _test_stream(llm.astream("Hello"), expect_usage=False)
284+
llm = ChatOpenAI(model="gpt-4.1-mini", temperature=0, max_tokens=MAX_TOKEN_COUNT) # type: ignore[call-arg]
285+
await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False)
286286
await _test_stream(
287287
llm.astream("Hello", stream_options={"include_usage": True}), expect_usage=True
288288
)
289289
await _test_stream(llm.astream("Hello", stream_usage=True), expect_usage=True)
290290
llm = ChatOpenAI(
291+
model="gpt-4.1-mini",
291292
temperature=0,
292293
max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg]
293294
model_kwargs={"stream_options": {"include_usage": True}},
@@ -297,7 +298,12 @@ async def _test_stream(stream: AsyncIterator, expect_usage: bool) -> None:
297298
llm.astream("Hello", stream_options={"include_usage": False}),
298299
expect_usage=False,
299300
)
300-
llm = ChatOpenAI(temperature=0, max_tokens=MAX_TOKEN_COUNT, stream_usage=True) # type: ignore[call-arg]
301+
llm = ChatOpenAI(
302+
model="gpt-4.1-mini",
303+
temperature=0,
304+
max_tokens=MAX_TOKEN_COUNT, # type: ignore[call-arg]
305+
stream_usage=True,
306+
)
301307
await _test_stream(llm.astream("Hello"), expect_usage=True)
302308
await _test_stream(llm.astream("Hello", stream_usage=False), expect_usage=False)
303309

libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def chat_model_class(self) -> type[BaseChatModel]:
2222

2323
@property
2424
def chat_model_params(self) -> dict:
25-
return {"model": "gpt-4o-mini", "stream_usage": True}
25+
return {"model": "gpt-4o-mini"}
2626

2727
@property
2828
def supports_image_inputs(self) -> bool:

libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
'request_timeout': 60.0,
2828
'stop': list([
2929
]),
30+
'stream_usage': True,
3031
'temperature': 0.0,
3132
'validate_base_url': True,
3233
}),

libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
'request_timeout': 60.0,
2222
'stop': list([
2323
]),
24+
'stream_usage': True,
2425
'temperature': 0.0,
2526
}),
2627
'lc': 1,

libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
'request_timeout': 60.0,
2222
'stop': list([
2323
]),
24+
'stream_usage': True,
2425
'temperature': 0.0,
2526
'use_responses_api': True,
2627
}),

0 commit comments

Comments
 (0)