diff --git a/pyproject.toml b/pyproject.toml index e85f1bb..6916735 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "zai-sdk" -version = "0.0.1b2" +version = "0.0.1b3" description = "A SDK library for accessing big model apis from Z.ai" authors = ["Z.ai"] readme = "README.md" diff --git a/src/zai/api_resource/chat/async_completions.py b/src/zai/api_resource/chat/async_completions.py index 58bb935..9f3980e 100644 --- a/src/zai/api_resource/chat/async_completions.py +++ b/src/zai/api_resource/chat/async_completions.py @@ -56,6 +56,8 @@ def create( extra_headers: Headers | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + response_format: object | None = None, + thinking: object | None = None, ) -> AsyncTaskStatus: """ Create an asynchronous chat completion task @@ -79,6 +81,8 @@ def create( extra_headers (Headers): Additional HTTP headers extra_body (Body): Additional request body parameters timeout (float | httpx.Timeout): Request timeout + response_format (Optional[object]): Response format specification + thinking (Optional[object]): Configuration parameters for model reasoning """ _cast_type = AsyncTaskStatus logger.debug(f'temperature:{temperature}, top_p:{top_p}') @@ -121,6 +125,8 @@ def create( 'tool_choice': tool_choice, 'meta': meta, 'extra': maybe_transform(extra, code_geex_params.CodeGeexExtra), + "response_format": response_format, + "thinking": thinking } return self._post( '/async/chat/completions', diff --git a/src/zai/api_resource/chat/completions.py b/src/zai/api_resource/chat/completions.py index cdf8780..1c02424 100644 --- a/src/zai/api_resource/chat/completions.py +++ b/src/zai/api_resource/chat/completions.py @@ -62,6 +62,7 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, response_format: object | None = None, + thinking: object | None = None, ) -> Completion | StreamResponse[ChatCompletionChunk]: """ Create a chat completion @@ -87,6 +88,7 @@ def create( extra_body (Body): Additional request body parameters timeout (float | httpx.Timeout): Request timeout response_format (object): Response format specification + thinking (Optional[object]): Configuration parameters for model reasoning """ logger.debug(f'temperature:{temperature}, top_p:{top_p}') if temperature is not None and temperature != NOT_GIVEN: @@ -131,6 +133,7 @@ def create( 'meta': meta, 'extra': maybe_transform(extra, code_geex_params.CodeGeexExtra), 'response_format': response_format, + "thinking": thinking } ) return self._post( diff --git a/src/zai/types/chat/chat_completion.py b/src/zai/types/chat/chat_completion.py index 924be1e..3c7c64c 100644 --- a/src/zai/types/chat/chat_completion.py +++ b/src/zai/types/chat/chat_completion.py @@ -45,17 +45,39 @@ class CompletionMessage(BaseModel): tool_calls: Optional[List[CompletionMessageToolCall]] = None +class PromptTokensDetails(BaseModel): + """ + Detailed breakdown of token usage for the input prompt + + Attributes: + cached_tokens: Number of tokens reused from cache + """ + cached_tokens: int + +class CompletionTokensDetails(BaseModel): + """ + Detailed breakdown of token usage for the model completion + + Attributes: + reasoning_tokens: Number of tokens used for reasoning steps + """ + reasoning_tokens: int + class CompletionUsage(BaseModel): """ Token usage information for completion Attributes: prompt_tokens: Number of tokens in the prompt + prompt_tokens_details: Detailed breakdown of token usage for the input prompt completion_tokens: Number of tokens in the completion + completion_tokens_details: Detailed breakdown of token usage for the model completion total_tokens: Total number of tokens used """ prompt_tokens: int + prompt_tokens_details: Optional[PromptTokensDetails] = None completion_tokens: int + completion_tokens_details: Optional[CompletionTokensDetails] = None total_tokens: int