Skip to content

Commit a627252

Browse files
code-c-lightmengqian
andauthored
feat: file parsing (#102)
Co-authored-by: mengqian <[email protected]>
1 parent d1bf260 commit a627252

File tree

9 files changed

+188
-2
lines changed

9 files changed

+188
-2
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "zhipuai"
3-
version = "2.1.5.20250801"
3+
version = "2.1.5.20250825"
44
description = "A SDK library for accessing big model apis from ZhipuAI"
55
authors = ["Zhipu AI"]
66
readme = "README.md"
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
import logging.config
5+
import os
6+
7+
import pytest
8+
9+
import zhipuai
10+
from zhipuai import ZhipuAI
11+
12+
13+
def test_file_parser_create(logging_conf):
14+
logging.config.dictConfig(logging_conf) # type: ignore
15+
client = ZhipuAI() # 填写您自己的APIKey
16+
try:
17+
response = client.file_parser.create(file=open('hitsuyoushorui-cn.pdf', 'rb'), file_type='pdf', tool_type='zhipu_pro')
18+
print(response)
19+
20+
except zhipuai.core._errors.APIRequestFailedError as err:
21+
print(err)
22+
except zhipuai.core._errors.APIInternalError as err:
23+
print(err)
24+
except zhipuai.core._errors.APIStatusError as err:
25+
print(err)
26+
27+
def test_file_parser_content(logging_conf):
28+
logging.config.dictConfig(logging_conf) # type: ignore
29+
client = ZhipuAI() # 填写您自己的APIKey
30+
try:
31+
response = client.file_parser.content(task_id="66e8f7ab884448c8b4190f251f6c2982-1", format_type="text")
32+
print(response.content.decode('utf-8'))
33+
34+
except zhipuai.core._errors.APIRequestFailedError as err:
35+
print(err)
36+
except zhipuai.core._errors.APIInternalError as err:
37+
print(err)
38+
except zhipuai.core._errors.APIStatusError as err:
39+
print(err)
40+

zhipuai/_client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def __init__(
6969
self.audio = api_resource.audio.Audio(self)
7070
self.moderations = api_resource.moderation.Moderations(self)
7171
self.agents = api_resource.agents.Agents(self)
72+
self.file_parser = api_resource.file_parser.FileParser(self)
7273

7374
@property
7475
@override

zhipuai/api_resource/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@
5353
Audio
5454
)
5555

56+
from .file_parser import (
57+
FileParser
58+
)
59+
5660
__all__ = [
5761
'Videos',
5862
'AsyncCompletions',
@@ -68,5 +72,6 @@
6872
'Tools',
6973
'Assistant',
7074
'Audio',
71-
'Moderation'
75+
'Moderation',
76+
'FileParser'
7277
]
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .file_parser import FileParser
2+
3+
__all__ = ['FileParser']
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, Mapping, cast
4+
5+
import httpx
6+
from typing_extensions import Literal
7+
8+
from ...core import BaseAPI, maybe_transform
9+
from ...core import NOT_GIVEN, Body, Headers, NotGiven, FileTypes
10+
from ...core import _legacy_binary_response
11+
from ...core import _legacy_response
12+
from ...core import deepcopy_minimal, extract_files
13+
from ...core import (
14+
make_request_options,
15+
)
16+
from ...types.file_parser.file_parser_create_params import FileParserCreateParams
17+
from ...types.file_parser.file_parser_resp import FileParserTaskCreateResp
18+
19+
if TYPE_CHECKING:
20+
from ..._client import ZhipuAI
21+
22+
__all__ = ["FileParser"]
23+
24+
25+
class FileParser(BaseAPI):
26+
27+
def __init__(self, client: "ZhipuAI") -> None:
28+
super().__init__(client)
29+
30+
def create(
31+
self,
32+
*,
33+
file: FileTypes = None,
34+
file_type: str = None,
35+
tool_type: Literal["simple", "doc2x", "tencent", "zhipu-pro"],
36+
extra_headers: Headers | None = None,
37+
extra_body: Body | None = None,
38+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
39+
) -> FileParserTaskCreateResp:
40+
41+
if not file:
42+
raise ValueError("At least one `file` must be provided.")
43+
body = deepcopy_minimal(
44+
{
45+
"file": file,
46+
"file_type": file_type,
47+
"tool_type": tool_type,
48+
}
49+
)
50+
51+
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
52+
if files:
53+
# It should be noted that the actual Content-Type header that will be
54+
# sent to the server will contain a `boundary` parameter, e.g.
55+
# multipart/form-data; boundary=---abc--
56+
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
57+
return self._post(
58+
"/files/parser/create",
59+
body=maybe_transform(body, FileParserCreateParams),
60+
files=files,
61+
options=make_request_options(
62+
extra_headers=extra_headers, extra_body=extra_body, timeout=timeout
63+
),
64+
cast_type=FileParserTaskCreateResp,
65+
)
66+
67+
def content(
68+
self,
69+
task_id: str,
70+
*,
71+
format_type: Literal["text", "download_link"],
72+
extra_headers: Headers | None = None,
73+
extra_body: Body | None = None,
74+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
75+
) -> _legacy_response.HttpxBinaryResponseContent:
76+
"""
77+
Returns the contents of the specified file.
78+
79+
Args:
80+
extra_headers: Send extra headers
81+
82+
extra_body: Add additional JSON properties to the request
83+
84+
timeout: Override the client-level default timeout for this request, in seconds
85+
"""
86+
if not task_id:
87+
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
88+
extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
89+
return self._get(
90+
f"/files/parser/result/{task_id}/{format_type}",
91+
options=make_request_options(
92+
extra_headers=extra_headers, extra_body=extra_body, timeout=timeout
93+
),
94+
cast_type=_legacy_binary_response.HttpxBinaryResponseContent,
95+
)

zhipuai/types/file_parser/__init__.py

Whitespace-only changes.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from __future__ import annotations
2+
3+
from typing_extensions import Literal, Required, TypedDict
4+
from ...core import NOT_GIVEN, Body, Headers, NotGiven, FileTypes
5+
6+
7+
__all__ = ["FileParserCreateParams", "FileParserDownloadParams"]
8+
9+
10+
class FileParserCreateParams(TypedDict):
11+
file: FileTypes
12+
"""上传的文件"""
13+
file_type: str
14+
"""文件类型"""
15+
tool_type: Literal["simple", "doc2x", "tencent", "zhipu-pro"]
16+
"""工具类型"""
17+
18+
19+
class FileParserDownloadParams(TypedDict):
20+
task_id: str
21+
"""解析任务id"""
22+
format_type: Literal["text", "download_link"]
23+
"""结果返回类型"""
24+
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from typing import List, Optional
2+
3+
from zhipuai.core import BaseModel
4+
5+
__all__ = [
6+
"FileParserTaskCreateResp"
7+
]
8+
9+
10+
class FileParserTaskCreateResp(BaseModel):
11+
task_id: str
12+
# 任务id
13+
message: str
14+
# message
15+
success: bool
16+
# 是否成功
17+
18+

0 commit comments

Comments
 (0)