From be2fa89c7364dba30268de230cf0e0ae3fe92fad Mon Sep 17 00:00:00 2001 From: mengqian Date: Sun, 3 Aug 2025 23:18:15 +0800 Subject: [PATCH 1/4] Add new file parsing service SDK and test cases --- tests/integration_tests/test_file_parser.py | 40 ++++++++ zhipuai/_client.py | 1 + zhipuai/api_resource/__init__.py | 7 +- zhipuai/api_resource/file_parser/__init__.py | 3 + .../api_resource/file_parser/file_parser.py | 97 +++++++++++++++++++ zhipuai/types/file_parser/__init__.py | 0 .../file_parser/file_parser_create_params.py | 24 +++++ zhipuai/types/file_parser/file_parser_resp.py | 18 ++++ 8 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 tests/integration_tests/test_file_parser.py create mode 100644 zhipuai/api_resource/file_parser/__init__.py create mode 100644 zhipuai/api_resource/file_parser/file_parser.py create mode 100644 zhipuai/types/file_parser/__init__.py create mode 100644 zhipuai/types/file_parser/file_parser_create_params.py create mode 100644 zhipuai/types/file_parser/file_parser_resp.py diff --git a/tests/integration_tests/test_file_parser.py b/tests/integration_tests/test_file_parser.py new file mode 100644 index 0000000..528dbb5 --- /dev/null +++ b/tests/integration_tests/test_file_parser.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import logging +import logging.config +import os + +import pytest + +import zhipuai +from zhipuai import ZhipuAI + + +def test_file_parser_create(logging_conf): + logging.config.dictConfig(logging_conf) # type: ignore + client = ZhipuAI() # 填写您自己的APIKey + try: + response = client.file_parser.create(file=open('hitsuyoushorui-cn.pdf', 'rb'), file_type='pdf', tool_type='zhipu_pro') + print(response) + + except zhipuai.core._errors.APIRequestFailedError as err: + print(err) + except zhipuai.core._errors.APIInternalError as err: + print(err) + except zhipuai.core._errors.APIStatusError as err: + print(err) + +def test_file_parser_content(logging_conf): + logging.config.dictConfig(logging_conf) # type: ignore + client = ZhipuAI() # 填写您自己的APIKey + try: + response = client.file_parser.content(task_id="66e8f7ab884448c8b4190f251f6c2982-1", format_type="text") + print(response.content.decode('utf-8')) + + except zhipuai.core._errors.APIRequestFailedError as err: + print(err) + except zhipuai.core._errors.APIInternalError as err: + print(err) + except zhipuai.core._errors.APIStatusError as err: + print(err) + diff --git a/zhipuai/_client.py b/zhipuai/_client.py index eb63ce0..87d7090 100644 --- a/zhipuai/_client.py +++ b/zhipuai/_client.py @@ -69,6 +69,7 @@ def __init__( self.audio = api_resource.audio.Audio(self) self.moderations = api_resource.moderation.Moderations(self) self.agents = api_resource.agents.Agents(self) + self.file_parser = api_resource.file_parser.FileParser(self) @property @override diff --git a/zhipuai/api_resource/__init__.py b/zhipuai/api_resource/__init__.py index 1c73bc3..00c5113 100644 --- a/zhipuai/api_resource/__init__.py +++ b/zhipuai/api_resource/__init__.py @@ -53,6 +53,10 @@ Audio ) +from .file_parser import ( + FileParser +) + __all__ = [ 'Videos', 'AsyncCompletions', @@ -68,5 +72,6 @@ 'Tools', 'Assistant', 'Audio', - 'Moderation' + 'Moderation', + 'FileParser' ] diff --git a/zhipuai/api_resource/file_parser/__init__.py b/zhipuai/api_resource/file_parser/__init__.py new file mode 100644 index 0000000..b263267 --- /dev/null +++ b/zhipuai/api_resource/file_parser/__init__.py @@ -0,0 +1,3 @@ +from .file_parser import FileParser + +__all__ = ['FileParser'] \ No newline at end of file diff --git a/zhipuai/api_resource/file_parser/file_parser.py b/zhipuai/api_resource/file_parser/file_parser.py new file mode 100644 index 0000000..ca4ff4c --- /dev/null +++ b/zhipuai/api_resource/file_parser/file_parser.py @@ -0,0 +1,97 @@ +from __future__ import annotations + + +from typing import TYPE_CHECKING, List, Mapping, cast +from typing_extensions import Literal + +from ...core import BaseAPI, maybe_transform +from ...core import NOT_GIVEN, Body, Headers, NotGiven, FileTypes + +import httpx + +from ...core import ( + make_request_options, +) +from ...core import deepcopy_minimal, extract_files +from ...types.file_parser.file_parser_create_params import FileParserCreateParams +from ...types.file_parser.file_parser_resp import FileParserTaskCreateResp +from ...core import _legacy_binary_response +from ...core import _legacy_response + +if TYPE_CHECKING: + from ..._client import ZhipuAI + +__all__ = ["FileParser"] + + +class FileParser(BaseAPI): + + def __init__(self, client: "ZhipuAI") -> None: + super().__init__(client) + + def create( + self, + *, + file: FileTypes = None, + file_type: str = None, + tool_type: Literal["simple", "doc2x", "tencent", "zhipu-pro"], + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FileParserTaskCreateResp: + + if not file: + raise ValueError("At least one `file` must be provided.") + body = deepcopy_minimal( + { + "file": file, + "file_type": file_type, + "tool_type": tool_type, + } + ) + + files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) + if files: + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return self._post( + "/fileParse/create", + body=maybe_transform(body, FileParserCreateParams), + files=files, + options=make_request_options( + extra_headers=extra_headers, extra_body=extra_body, timeout=timeout + ), + cast_type=FileParserTaskCreateResp, + ) + + def content( + self, + task_id: str, + *, + format_type: Literal["text", "download_link"], + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> _legacy_response.HttpxBinaryResponseContent: + """ + Returns the contents of the specified file. + + Args: + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not task_id: + raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + extra_headers = {"Accept": "application/binary", **(extra_headers or {})} + return self._get( + f"/fileParse/getResult/{task_id}/{format_type}", + options=make_request_options( + extra_headers=extra_headers, extra_body=extra_body, timeout=timeout + ), + cast_type=_legacy_binary_response.HttpxBinaryResponseContent, + ) diff --git a/zhipuai/types/file_parser/__init__.py b/zhipuai/types/file_parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/zhipuai/types/file_parser/file_parser_create_params.py b/zhipuai/types/file_parser/file_parser_create_params.py new file mode 100644 index 0000000..03684ec --- /dev/null +++ b/zhipuai/types/file_parser/file_parser_create_params.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict +from ...core import NOT_GIVEN, Body, Headers, NotGiven, FileTypes + + +__all__ = ["FileParserCreateParams", "FileParserDownloadParams"] + + +class FileParserCreateParams(TypedDict): + file: FileTypes + """上传的文件""" + file_type: str + """文件类型""" + tool_type: Literal["simple", "doc2x", "tencent", "zhipu-pro"] + """工具类型""" + + +class FileParserDownloadParams(TypedDict): + task_id: str + """解析任务id""" + format_type: Literal["text", "download_link"] + """结果返回类型""" + diff --git a/zhipuai/types/file_parser/file_parser_resp.py b/zhipuai/types/file_parser/file_parser_resp.py new file mode 100644 index 0000000..a89e0aa --- /dev/null +++ b/zhipuai/types/file_parser/file_parser_resp.py @@ -0,0 +1,18 @@ +from typing import List, Optional + +from zhipuai.core import BaseModel + +__all__ = [ + "FileParserTaskCreateResp" +] + + +class FileParserTaskCreateResp(BaseModel): + task_id: str + # 任务id + message: str + # message + success: bool + # 是否成功 + + From 71362e3dac7e56171445b1a312b4b55db4d8d676 Mon Sep 17 00:00:00 2001 From: mengqian Date: Tue, 5 Aug 2025 11:01:49 +0800 Subject: [PATCH 2/4] modify request url --- zhipuai/api_resource/file_parser/file_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zhipuai/api_resource/file_parser/file_parser.py b/zhipuai/api_resource/file_parser/file_parser.py index ca4ff4c..9e58a35 100644 --- a/zhipuai/api_resource/file_parser/file_parser.py +++ b/zhipuai/api_resource/file_parser/file_parser.py @@ -57,7 +57,7 @@ def create( # multipart/form-data; boundary=---abc-- extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} return self._post( - "/fileParse/create", + "/files/parser/create", body=maybe_transform(body, FileParserCreateParams), files=files, options=make_request_options( @@ -89,7 +89,7 @@ def content( raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") extra_headers = {"Accept": "application/binary", **(extra_headers or {})} return self._get( - f"/fileParse/getResult/{task_id}/{format_type}", + f"/files/parser/getResult/{task_id}/{format_type}", options=make_request_options( extra_headers=extra_headers, extra_body=extra_body, timeout=timeout ), From 8a748aaa1c38f6eb03652ce1ae9060e22128115b Mon Sep 17 00:00:00 2001 From: mengqian Date: Thu, 7 Aug 2025 23:22:57 +0800 Subject: [PATCH 3/4] modify request url --- zhipuai/api_resource/file_parser/file_parser.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/zhipuai/api_resource/file_parser/file_parser.py b/zhipuai/api_resource/file_parser/file_parser.py index 9e58a35..db25766 100644 --- a/zhipuai/api_resource/file_parser/file_parser.py +++ b/zhipuai/api_resource/file_parser/file_parser.py @@ -1,22 +1,20 @@ from __future__ import annotations +from typing import TYPE_CHECKING, Mapping, cast -from typing import TYPE_CHECKING, List, Mapping, cast +import httpx from typing_extensions import Literal from ...core import BaseAPI, maybe_transform from ...core import NOT_GIVEN, Body, Headers, NotGiven, FileTypes - -import httpx - +from ...core import _legacy_binary_response +from ...core import _legacy_response +from ...core import deepcopy_minimal, extract_files from ...core import ( make_request_options, ) -from ...core import deepcopy_minimal, extract_files from ...types.file_parser.file_parser_create_params import FileParserCreateParams from ...types.file_parser.file_parser_resp import FileParserTaskCreateResp -from ...core import _legacy_binary_response -from ...core import _legacy_response if TYPE_CHECKING: from ..._client import ZhipuAI @@ -89,7 +87,7 @@ def content( raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") extra_headers = {"Accept": "application/binary", **(extra_headers or {})} return self._get( - f"/files/parser/getResult/{task_id}/{format_type}", + f"/files/parser/result/{task_id}/{format_type}", options=make_request_options( extra_headers=extra_headers, extra_body=extra_body, timeout=timeout ), From 5810305ee5df83b9653a47acb35880f8224d4330 Mon Sep 17 00:00:00 2001 From: mengqian Date: Mon, 25 Aug 2025 18:47:51 +0800 Subject: [PATCH 4/4] update version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 492c99e..9b9c85b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "zhipuai" -version = "2.1.5.20250801" +version = "2.1.5.20250825" description = "A SDK library for accessing big model apis from ZhipuAI" authors = ["Zhipu AI"] readme = "README.md"