@@ -600,7 +600,7 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
600600
601601 return ModelResponse (
602602 parts = items ,
603- usage = _map_usage (response ),
603+ usage = _map_usage (response , self . _provider . name , self . _provider . base_url , self . _model_name ),
604604 model_name = response .model ,
605605 timestamp = timestamp ,
606606 provider_details = vendor_details or None ,
@@ -631,6 +631,7 @@ async def _process_streamed_response(
631631 _response = peekable_response ,
632632 _timestamp = number_to_datetime (first_chunk .created ),
633633 _provider_name = self ._provider .name ,
634+ _provider_url = self ._provider .base_url ,
634635 )
635636
636637 def _get_tools (self , model_request_parameters : ModelRequestParameters ) -> list [chat .ChatCompletionToolParam ]:
@@ -1061,7 +1062,7 @@ def _process_response( # noqa: C901
10611062
10621063 return ModelResponse (
10631064 parts = items ,
1064- usage = _map_usage (response ),
1065+ usage = _map_usage (response , self . _provider . name , self . _provider . base_url , self . _model_name ),
10651066 model_name = response .model ,
10661067 provider_response_id = response .id ,
10671068 timestamp = timestamp ,
@@ -1088,6 +1089,7 @@ async def _process_streamed_response(
10881089 _response = peekable_response ,
10891090 _timestamp = number_to_datetime (first_chunk .response .created_at ),
10901091 _provider_name = self ._provider .name ,
1092+ _provider_url = self ._provider .base_url ,
10911093 )
10921094
10931095 @overload
@@ -1589,10 +1591,11 @@ class OpenAIStreamedResponse(StreamedResponse):
15891591 _response : AsyncIterable [ChatCompletionChunk ]
15901592 _timestamp : datetime
15911593 _provider_name : str
1594+ _provider_url : str
15921595
15931596 async def _get_event_iterator (self ) -> AsyncIterator [ModelResponseStreamEvent ]:
15941597 async for chunk in self ._response :
1595- self ._usage += _map_usage (chunk )
1598+ self ._usage += _map_usage (chunk , self . _provider_name , self . _provider_url , self . _model_name )
15961599
15971600 if chunk .id : # pragma: no branch
15981601 self .provider_response_id = chunk .id
@@ -1683,12 +1686,13 @@ class OpenAIResponsesStreamedResponse(StreamedResponse):
16831686 _response : AsyncIterable [responses .ResponseStreamEvent ]
16841687 _timestamp : datetime
16851688 _provider_name : str
1689+ _provider_url : str
16861690
16871691 async def _get_event_iterator (self ) -> AsyncIterator [ModelResponseStreamEvent ]: # noqa: C901
16881692 async for chunk in self ._response :
16891693 # NOTE: You can inspect the builtin tools used checking the `ResponseCompletedEvent`.
16901694 if isinstance (chunk , responses .ResponseCompletedEvent ):
1691- self ._usage += _map_usage (chunk .response )
1695+ self ._usage += self . _map_usage (chunk .response )
16921696
16931697 raw_finish_reason = (
16941698 details .reason if (details := chunk .response .incomplete_details ) else chunk .response .status
@@ -1708,7 +1712,7 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
17081712 self .provider_response_id = chunk .response .id
17091713
17101714 elif isinstance (chunk , responses .ResponseFailedEvent ): # pragma: no cover
1711- self ._usage += _map_usage (chunk .response )
1715+ self ._usage += self . _map_usage (chunk .response )
17121716
17131717 elif isinstance (chunk , responses .ResponseFunctionCallArgumentsDeltaEvent ):
17141718 maybe_event = self ._parts_manager .handle_tool_call_delta (
@@ -1722,10 +1726,10 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
17221726 pass # there's nothing we need to do here
17231727
17241728 elif isinstance (chunk , responses .ResponseIncompleteEvent ): # pragma: no cover
1725- self ._usage += _map_usage (chunk .response )
1729+ self ._usage += self . _map_usage (chunk .response )
17261730
17271731 elif isinstance (chunk , responses .ResponseInProgressEvent ):
1728- self ._usage += _map_usage (chunk .response )
1732+ self ._usage += self . _map_usage (chunk .response )
17291733
17301734 elif isinstance (chunk , responses .ResponseOutputItemAddedEvent ):
17311735 if isinstance (chunk .item , responses .ResponseFunctionToolCall ):
@@ -1906,6 +1910,9 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
19061910 UserWarning ,
19071911 )
19081912
1913+ def _map_usage (self , response : responses .Response ):
1914+ return _map_usage (response , self ._provider_name , self ._provider_url , self ._model_name )
1915+
19091916 @property
19101917 def model_name (self ) -> OpenAIModelName :
19111918 """Get the model name of the response."""
@@ -1922,55 +1929,45 @@ def timestamp(self) -> datetime:
19221929 return self ._timestamp
19231930
19241931
1925- def _map_usage (response : chat .ChatCompletion | ChatCompletionChunk | responses .Response ) -> usage .RequestUsage :
1932+ def _map_usage (
1933+ response : chat .ChatCompletion | ChatCompletionChunk | responses .Response ,
1934+ provider : str ,
1935+ provider_url : str ,
1936+ model : str ,
1937+ ) -> usage .RequestUsage :
19261938 response_usage = response .usage
19271939 if response_usage is None :
19281940 return usage .RequestUsage ()
1929- elif isinstance (response_usage , responses .ResponseUsage ):
1930- details : dict [str , int ] = {
1931- key : value
1932- for key , value in response_usage .model_dump (
1933- exclude = {'input_tokens' , 'output_tokens' , 'total_tokens' }
1934- ).items ()
1935- if isinstance (value , int )
1936- }
1937- # Handle vLLM compatibility - some providers don't include token details
1938- if getattr (response_usage , 'input_tokens_details' , None ) is not None :
1939- cache_read_tokens = response_usage .input_tokens_details .cached_tokens
1940- else :
1941- cache_read_tokens = 0
1941+
1942+ usage_data = response_usage .model_dump (exclude_none = True )
1943+ details = {
1944+ k : v
1945+ for k , v in usage_data .items ()
1946+ if k not in {'prompt_tokens' , 'completion_tokens' , 'input_tokens' , 'output_tokens' , 'total_tokens' }
1947+ if isinstance (v , int )
1948+ }
1949+ response_data = dict (model = model , usage = usage_data )
1950+ if isinstance (response_usage , responses .ResponseUsage ):
1951+ api_flavor = 'responses'
19421952
19431953 if getattr (response_usage , 'output_tokens_details' , None ) is not None :
19441954 details ['reasoning_tokens' ] = response_usage .output_tokens_details .reasoning_tokens
19451955 else :
19461956 details ['reasoning_tokens' ] = 0
1947-
1948- return usage .RequestUsage (
1949- input_tokens = response_usage .input_tokens ,
1950- output_tokens = response_usage .output_tokens ,
1951- cache_read_tokens = cache_read_tokens ,
1952- details = details ,
1953- )
19541957 else :
1955- details = {
1956- key : value
1957- for key , value in response_usage .model_dump (
1958- exclude_none = True , exclude = {'prompt_tokens' , 'completion_tokens' , 'total_tokens' }
1959- ).items ()
1960- if isinstance (value , int )
1961- }
1962- u = usage .RequestUsage (
1963- input_tokens = response_usage .prompt_tokens ,
1964- output_tokens = response_usage .completion_tokens ,
1965- details = details ,
1966- )
1958+ api_flavor = 'chat'
1959+
19671960 if response_usage .completion_tokens_details is not None :
19681961 details .update (response_usage .completion_tokens_details .model_dump (exclude_none = True ))
1969- u .output_audio_tokens = response_usage .completion_tokens_details .audio_tokens or 0
1970- if response_usage .prompt_tokens_details is not None :
1971- u .input_audio_tokens = response_usage .prompt_tokens_details .audio_tokens or 0
1972- u .cache_read_tokens = response_usage .prompt_tokens_details .cached_tokens or 0
1973- return u
1962+
1963+ return usage .RequestUsage .extract (
1964+ response_data ,
1965+ provider = provider ,
1966+ provider_url = provider_url ,
1967+ provider_fallback = 'openai' ,
1968+ api_flavor = api_flavor ,
1969+ details = details ,
1970+ )
19741971
19751972
19761973def _split_combined_tool_call_id (combined_id : str ) -> tuple [str , str | None ]:
0 commit comments