Skip to content

Commit 47d3e5c

Browse files
authored
Rename Cost to Usage (#403)
1 parent f8cfa28 commit 47d3e5c

27 files changed

+289
-289
lines changed

docs/api/models/ollama.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ agent = Agent('ollama:llama3.2', result_type=CityLocation)
3131
result = agent.run_sync('Where were the olympics held in 2012?')
3232
print(result.data)
3333
#> city='London' country='United Kingdom'
34-
print(result.cost())
35-
#> Cost(request_tokens=57, response_tokens=8, total_tokens=65, details=None)
34+
print(result.usage())
35+
#> Usage(request_tokens=57, response_tokens=8, total_tokens=65, details=None)
3636
```
3737

3838
## Example using a remote server
@@ -59,8 +59,8 @@ agent = Agent(model=ollama_model, result_type=CityLocation)
5959
result = agent.run_sync('Where were the olympics held in 2012?')
6060
print(result.data)
6161
#> city='London' country='United Kingdom'
62-
print(result.cost())
63-
#> Cost(request_tokens=57, response_tokens=8, total_tokens=65, details=None)
62+
print(result.usage())
63+
#> Usage(request_tokens=57, response_tokens=8, total_tokens=65, details=None)
6464
```
6565

6666
1. The name of the model running on the remote server

docs/api/result.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@
77
- ResultData
88
- RunResult
99
- StreamedRunResult
10-
- Cost
10+
- Usage

docs/results.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Results are the final values returned from [running an agent](agents.md#running-agents).
2-
The result values are wrapped in [`RunResult`][pydantic_ai.result.RunResult] and [`StreamedRunResult`][pydantic_ai.result.StreamedRunResult] so you can access other data like [cost][pydantic_ai.result.Cost] of the run and [message history](message-history.md#accessing-messages-from-results)
2+
The result values are wrapped in [`RunResult`][pydantic_ai.result.RunResult] and [`StreamedRunResult`][pydantic_ai.result.StreamedRunResult] so you can access other data like [usage][pydantic_ai.result.Usage] of the run and [message history](message-history.md#accessing-messages-from-results)
33

44
Both `RunResult` and `StreamedRunResult` are generic in the data they wrap, so typing information about the data returned by the agent is preserved.
55

@@ -18,8 +18,8 @@ agent = Agent('gemini-1.5-flash', result_type=CityLocation)
1818
result = agent.run_sync('Where were the olympics held in 2012?')
1919
print(result.data)
2020
#> city='London' country='United Kingdom'
21-
print(result.cost())
22-
#> Cost(request_tokens=57, response_tokens=8, total_tokens=65, details=None)
21+
print(result.usage())
22+
#> Usage(request_tokens=57, response_tokens=8, total_tokens=65, details=None)
2323
```
2424

2525
_(This example is complete, it can be run "as is")_

docs/testing-evals.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Unless you're really sure you know better, you'll probably want to follow roughl
1818
* Use [`pytest`](https://docs.pytest.org/en/stable/) as your test harness
1919
* If you find yourself typing out long assertions, use [inline-snapshot](https://15r10nk.github.io/inline-snapshot/latest/)
2020
* Similarly, [dirty-equals](https://dirty-equals.helpmanual.io/latest/) can be useful for comparing large data structures
21-
* Use [`TestModel`][pydantic_ai.models.test.TestModel] or [`FunctionModel`][pydantic_ai.models.function.FunctionModel] in place of your actual model to avoid the cost, latency and variability of real LLM calls
21+
* Use [`TestModel`][pydantic_ai.models.test.TestModel] or [`FunctionModel`][pydantic_ai.models.function.FunctionModel] in place of your actual model to avoid the usage, latency and variability of real LLM calls
2222
* Use [`Agent.override`][pydantic_ai.agent.Agent.override] to replace your model inside your application logic
2323
* Set [`ALLOW_MODEL_REQUESTS=False`][pydantic_ai.models.ALLOW_MODEL_REQUESTS] globally to block any requests from being made to non-test models accidentally
2424

pydantic_ai_examples/pydantic_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ class MyModel(BaseModel):
3030
if __name__ == '__main__':
3131
result = agent.run_sync('The windy city in the US of A.')
3232
print(result.data)
33-
print(result.cost())
33+
print(result.usage())

pydantic_ai_examples/stream_markdown.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ async def main():
4343
async with agent.run_stream(prompt, model=model) as result:
4444
async for message in result.stream():
4545
live.update(Markdown(message))
46-
console.log(result.cost())
46+
console.log(result.usage())
4747
else:
4848
console.log(f'{model} requires {env_var} to be set.')
4949

pydantic_ai_slim/pydantic_ai/agent.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ async def run(
237237
for tool in self._function_tools.values():
238238
tool.current_retry = 0
239239

240-
cost = result.Cost()
240+
usage = result.Usage()
241241

242242
model_settings = merge_model_settings(self.model_settings, model_settings)
243243

@@ -248,12 +248,12 @@ async def run(
248248
agent_model = await self._prepare_model(model_used, deps, messages)
249249

250250
with _logfire.span('model request', run_step=run_step) as model_req_span:
251-
model_response, request_cost = await agent_model.request(messages, model_settings)
251+
model_response, request_usage = await agent_model.request(messages, model_settings)
252252
model_req_span.set_attribute('response', model_response)
253-
model_req_span.set_attribute('cost', request_cost)
253+
model_req_span.set_attribute('usage', request_usage)
254254

255255
messages.append(model_response)
256-
cost += request_cost
256+
usage += request_usage
257257

258258
with _logfire.span('handle model response', run_step=run_step) as handle_span:
259259
final_result, tool_responses = await self._handle_model_response(model_response, deps, messages)
@@ -266,10 +266,10 @@ async def run(
266266
if final_result is not None:
267267
result_data = final_result.data
268268
run_span.set_attribute('all_messages', messages)
269-
run_span.set_attribute('cost', cost)
269+
run_span.set_attribute('usage', usage)
270270
handle_span.set_attribute('result', result_data)
271271
handle_span.message = 'handle model response -> final result'
272-
return result.RunResult(messages, new_message_index, result_data, cost)
272+
return result.RunResult(messages, new_message_index, result_data, usage)
273273
else:
274274
# continue the conversation
275275
handle_span.set_attribute('tool_responses', tool_responses)
@@ -385,7 +385,7 @@ async def main():
385385
for tool in self._function_tools.values():
386386
tool.current_retry = 0
387387

388-
cost = result.Cost()
388+
usage = result.Usage()
389389
model_settings = merge_model_settings(self.model_settings, model_settings)
390390

391391
run_step = 0
@@ -434,7 +434,7 @@ async def on_complete():
434434
yield result.StreamedRunResult(
435435
messages,
436436
new_message_index,
437-
cost,
437+
usage,
438438
result_stream,
439439
self._result_schema,
440440
deps,
@@ -455,8 +455,8 @@ async def on_complete():
455455
handle_span.set_attribute('tool_responses', tool_responses)
456456
tool_responses_str = ' '.join(r.part_kind for r in tool_responses)
457457
handle_span.message = f'handle model response -> {tool_responses_str}'
458-
# the model_response should have been fully streamed by now, we can add it's cost
459-
cost += model_response.cost()
458+
# the model_response should have been fully streamed by now, we can add its usage
459+
usage += model_response.usage()
460460

461461
@contextmanager
462462
def override(
@@ -990,7 +990,7 @@ async def _handle_streamed_model_response(
990990
response = _messages.RetryPromptPart(
991991
content='Plain text responses are not permitted, please call one of the functions instead.',
992992
)
993-
# stream the response, so cost is correct
993+
# stream the response, so usage is correct
994994
async for _ in model_response:
995995
pass
996996

pydantic_ai_slim/pydantic_ai/models/__init__.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from ..settings import ModelSettings
2121

2222
if TYPE_CHECKING:
23-
from ..result import Cost
23+
from ..result import Usage
2424
from ..tools import ToolDefinition
2525

2626

@@ -122,7 +122,7 @@ class AgentModel(ABC):
122122
@abstractmethod
123123
async def request(
124124
self, messages: list[ModelMessage], model_settings: ModelSettings | None
125-
) -> tuple[ModelResponse, Cost]:
125+
) -> tuple[ModelResponse, Usage]:
126126
"""Make a request to the model."""
127127
raise NotImplementedError()
128128

@@ -164,10 +164,10 @@ def get(self, *, final: bool = False) -> Iterable[str]:
164164
raise NotImplementedError()
165165

166166
@abstractmethod
167-
def cost(self) -> Cost:
168-
"""Return the cost of the request.
167+
def usage(self) -> Usage:
168+
"""Return the usage of the request.
169169
170-
NOTE: this won't return the ful cost until the stream is finished.
170+
NOTE: this won't return the full usage until the stream is finished.
171171
"""
172172
raise NotImplementedError()
173173

@@ -205,10 +205,10 @@ def get(self, *, final: bool = False) -> ModelResponse:
205205
raise NotImplementedError()
206206

207207
@abstractmethod
208-
def cost(self) -> Cost:
209-
"""Get the cost of the request.
208+
def usage(self) -> Usage:
209+
"""Get the usage of the request.
210210
211-
NOTE: this won't return the full cost until the stream is finished.
211+
NOTE: this won't return the full usage until the stream is finished.
212212
"""
213213
raise NotImplementedError()
214214

@@ -235,7 +235,7 @@ def timestamp(self) -> datetime:
235235
def check_allow_model_requests() -> None:
236236
"""Check if model requests are allowed.
237237
238-
If you're defining your own models that have cost or latency associated with their use, you should call this in
238+
If you're defining your own models that have costs or latency associated with their use, you should call this in
239239
[`Model.agent_model`][pydantic_ai.models.Model.agent_model].
240240
241241
Raises:

pydantic_ai_slim/pydantic_ai/models/anthropic.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,9 @@ class AnthropicAgentModel(AgentModel):
158158

159159
async def request(
160160
self, messages: list[ModelMessage], model_settings: ModelSettings | None
161-
) -> tuple[ModelResponse, result.Cost]:
161+
) -> tuple[ModelResponse, result.Usage]:
162162
response = await self._messages_create(messages, False, model_settings)
163-
return self._process_response(response), _map_cost(response)
163+
return self._process_response(response), _map_usage(response)
164164

165165
@asynccontextmanager
166166
async def request_stream(
@@ -315,7 +315,7 @@ def _map_tool_call(t: ToolCallPart) -> ToolUseBlockParam:
315315
)
316316

317317

318-
def _map_cost(message: AnthropicMessage | RawMessageStreamEvent) -> result.Cost:
318+
def _map_usage(message: AnthropicMessage | RawMessageStreamEvent) -> result.Usage:
319319
if isinstance(message, AnthropicMessage):
320320
usage = message.usage
321321
else:
@@ -332,11 +332,11 @@ def _map_cost(message: AnthropicMessage | RawMessageStreamEvent) -> result.Cost:
332332
usage = None
333333

334334
if usage is None:
335-
return result.Cost()
335+
return result.Usage()
336336

337337
request_tokens = getattr(usage, 'input_tokens', None)
338338

339-
return result.Cost(
339+
return result.Usage(
340340
# Usage coming from the RawMessageDeltaEvent doesn't have input token data, hence this getattr
341341
request_tokens=request_tokens,
342342
response_tokens=usage.output_tokens,

pydantic_ai_slim/pydantic_ai/models/function.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ class FunctionAgentModel(AgentModel):
144144

145145
async def request(
146146
self, messages: list[ModelMessage], model_settings: ModelSettings | None
147-
) -> tuple[ModelResponse, result.Cost]:
147+
) -> tuple[ModelResponse, result.Usage]:
148148
agent_info = replace(self.agent_info, model_settings=model_settings)
149149

150150
assert self.function is not None, 'FunctionModel must receive a `function` to support non-streamed requests'
@@ -155,7 +155,7 @@ async def request(
155155
assert isinstance(response_, ModelResponse), response_
156156
response = response_
157157
# TODO is `messages` right here? Should it just be new messages?
158-
return response, _estimate_cost(chain(messages, [response]))
158+
return response, _estimate_usage(chain(messages, [response]))
159159

160160
@asynccontextmanager
161161
async def request_stream(
@@ -198,8 +198,8 @@ def get(self, *, final: bool = False) -> Iterable[str]:
198198
yield from self._buffer
199199
self._buffer.clear()
200200

201-
def cost(self) -> result.Cost:
202-
return result.Cost()
201+
def usage(self) -> result.Usage:
202+
return result.Usage()
203203

204204
def timestamp(self) -> datetime:
205205
return self._timestamp
@@ -236,15 +236,15 @@ def get(self, *, final: bool = False) -> ModelResponse:
236236

237237
return ModelResponse(calls, timestamp=self._timestamp)
238238

239-
def cost(self) -> result.Cost:
240-
return result.Cost()
239+
def usage(self) -> result.Usage:
240+
return result.Usage()
241241

242242
def timestamp(self) -> datetime:
243243
return self._timestamp
244244

245245

246-
def _estimate_cost(messages: Iterable[ModelMessage]) -> result.Cost:
247-
"""Very rough guesstimate of the number of tokens associate with a series of messages.
246+
def _estimate_usage(messages: Iterable[ModelMessage]) -> result.Usage:
247+
"""Very rough guesstimate of the token usage associated with a series of messages.
248248
249249
This is designed to be used solely to give plausible numbers for testing!
250250
"""
@@ -255,32 +255,32 @@ def _estimate_cost(messages: Iterable[ModelMessage]) -> result.Cost:
255255
if isinstance(message, ModelRequest):
256256
for part in message.parts:
257257
if isinstance(part, (SystemPromptPart, UserPromptPart)):
258-
request_tokens += _string_cost(part.content)
258+
request_tokens += _string_usage(part.content)
259259
elif isinstance(part, ToolReturnPart):
260-
request_tokens += _string_cost(part.model_response_str())
260+
request_tokens += _string_usage(part.model_response_str())
261261
elif isinstance(part, RetryPromptPart):
262-
request_tokens += _string_cost(part.model_response())
262+
request_tokens += _string_usage(part.model_response())
263263
else:
264264
assert_never(part)
265265
elif isinstance(message, ModelResponse):
266266
for part in message.parts:
267267
if isinstance(part, TextPart):
268-
response_tokens += _string_cost(part.content)
268+
response_tokens += _string_usage(part.content)
269269
elif isinstance(part, ToolCallPart):
270270
call = part
271271
if isinstance(call.args, ArgsJson):
272272
args_str = call.args.args_json
273273
else:
274274
args_str = pydantic_core.to_json(call.args.args_dict).decode()
275-
response_tokens += 1 + _string_cost(args_str)
275+
response_tokens += 1 + _string_usage(args_str)
276276
else:
277277
assert_never(part)
278278
else:
279279
assert_never(message)
280-
return result.Cost(
280+
return result.Usage(
281281
request_tokens=request_tokens, response_tokens=response_tokens, total_tokens=request_tokens + response_tokens
282282
)
283283

284284

285-
def _string_cost(content: str) -> int:
285+
def _string_usage(content: str) -> int:
286286
return len(re.split(r'[\s",.:]+', content))

0 commit comments

Comments
 (0)