Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "zai-sdk"
version = "0.0.1b2"
version = "0.0.1b3"
description = "A SDK library for accessing big model apis from Z.ai"
authors = ["Z.ai"]
readme = "README.md"
Expand Down
6 changes: 6 additions & 0 deletions src/zai/api_resource/chat/async_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def create(
extra_headers: Headers | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
response_format: object | None = None,
thinking: object | None = None,
) -> AsyncTaskStatus:
"""
Create an asynchronous chat completion task
Expand All @@ -79,6 +81,8 @@ def create(
extra_headers (Headers): Additional HTTP headers
extra_body (Body): Additional request body parameters
timeout (float | httpx.Timeout): Request timeout
response_format (Optional[object]): Response format specification
thinking (Optional[object]): Configuration parameters for model reasoning
"""
_cast_type = AsyncTaskStatus
logger.debug(f'temperature:{temperature}, top_p:{top_p}')
Expand Down Expand Up @@ -121,6 +125,8 @@ def create(
'tool_choice': tool_choice,
'meta': meta,
'extra': maybe_transform(extra, code_geex_params.CodeGeexExtra),
"response_format": response_format,
"thinking": thinking
}
return self._post(
'/async/chat/completions',
Expand Down
3 changes: 3 additions & 0 deletions src/zai/api_resource/chat/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def create(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
response_format: object | None = None,
thinking: object | None = None,
) -> Completion | StreamResponse[ChatCompletionChunk]:
"""
Create a chat completion
Expand All @@ -87,6 +88,7 @@ def create(
extra_body (Body): Additional request body parameters
timeout (float | httpx.Timeout): Request timeout
response_format (object): Response format specification
thinking (Optional[object]): Configuration parameters for model reasoning
"""
logger.debug(f'temperature:{temperature}, top_p:{top_p}')
if temperature is not None and temperature != NOT_GIVEN:
Expand Down Expand Up @@ -131,6 +133,7 @@ def create(
'meta': meta,
'extra': maybe_transform(extra, code_geex_params.CodeGeexExtra),
'response_format': response_format,
"thinking": thinking
}
)
return self._post(
Expand Down
22 changes: 22 additions & 0 deletions src/zai/types/chat/chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,39 @@ class CompletionMessage(BaseModel):
tool_calls: Optional[List[CompletionMessageToolCall]] = None


class PromptTokensDetails(BaseModel):
"""
Detailed breakdown of token usage for the input prompt

Attributes:
cached_tokens: Number of tokens reused from cache
"""
cached_tokens: int

class CompletionTokensDetails(BaseModel):
"""
Detailed breakdown of token usage for the model completion

Attributes:
reasoning_tokens: Number of tokens used for reasoning steps
"""
reasoning_tokens: int

class CompletionUsage(BaseModel):
"""
Token usage information for completion

Attributes:
prompt_tokens: Number of tokens in the prompt
prompt_tokens_details: Detailed breakdown of token usage for the input prompt
completion_tokens: Number of tokens in the completion
completion_tokens_details: Detailed breakdown of token usage for the model completion
total_tokens: Total number of tokens used
"""
prompt_tokens: int
prompt_tokens_details: Optional[PromptTokensDetails] = None
completion_tokens: int
completion_tokens_details: Optional[CompletionTokensDetails] = None
total_tokens: int


Expand Down