|
7 | 7 |
|
8 | 8 | @dataclasses.dataclass |
9 | 9 | class Files: |
10 | | - content: bytes = dataclasses.field(metadata={'multipart_form': { 'content': True }}) |
11 | | - file_name: str = dataclasses.field(metadata={'multipart_form': { 'field_name': 'files' }}) |
12 | | - |
13 | | - |
| 10 | + content: bytes = dataclasses.field(metadata={"multipart_form": {"content": True}}) |
| 11 | + file_name: str = dataclasses.field( |
| 12 | + metadata={"multipart_form": {"field_name": "files"}} |
| 13 | + ) |
14 | 14 |
|
15 | 15 |
|
16 | 16 | @dataclasses.dataclass |
17 | 17 | class PartitionParameters: |
18 | | - chunking_strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'chunking_strategy' }}) |
| 18 | + chunking_strategy: Optional[str] = dataclasses.field( |
| 19 | + default=None, metadata={"multipart_form": {"field_name": "chunking_strategy"}} |
| 20 | + ) |
19 | 21 | r"""Use one of the supported strategies to chunk the returned elements. Currently supports: by_title""" |
20 | | - combine_under_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'combine_under_n_chars' }}) |
| 22 | + combine_under_n_chars: Optional[int] = dataclasses.field( |
| 23 | + default=None, |
| 24 | + metadata={"multipart_form": {"field_name": "combine_under_n_chars"}}, |
| 25 | + ) |
21 | 26 | r"""If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500""" |
22 | | - coordinates: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'coordinates' }}) |
| 27 | + coordinates: Optional[bool] = dataclasses.field( |
| 28 | + default=None, metadata={"multipart_form": {"field_name": "coordinates"}} |
| 29 | + ) |
23 | 30 | r"""If true, return coordinates for each element. Default: false""" |
24 | | - encoding: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'encoding' }}) |
| 31 | + encoding: Optional[str] = dataclasses.field( |
| 32 | + default=None, metadata={"multipart_form": {"field_name": "encoding"}} |
| 33 | + ) |
25 | 34 | r"""The encoding method used to decode the text input. Default: utf-8""" |
26 | | - files: Optional[Files] = dataclasses.field(default=None, metadata={'multipart_form': { 'file': True }}) |
| 35 | + files: Optional[Files] = dataclasses.field( |
| 36 | + default=None, metadata={"multipart_form": {"file": True}} |
| 37 | + ) |
27 | 38 | r"""The file to extract""" |
28 | | - gz_uncompressed_content_type: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'gz_uncompressed_content_type' }}) |
| 39 | + gz_uncompressed_content_type: Optional[str] = dataclasses.field( |
| 40 | + default=None, |
| 41 | + metadata={"multipart_form": {"field_name": "gz_uncompressed_content_type"}}, |
| 42 | + ) |
29 | 43 | r"""If file is gzipped, use this content type after unzipping""" |
30 | | - hi_res_model_name: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'hi_res_model_name' }}) |
| 44 | + hi_res_model_name: Optional[str] = dataclasses.field( |
| 45 | + default=None, metadata={"multipart_form": {"field_name": "hi_res_model_name"}} |
| 46 | + ) |
31 | 47 | r"""The name of the inference model used when strategy is hi_res""" |
32 | | - include_page_breaks: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'include_page_breaks' }}) |
| 48 | + include_page_breaks: Optional[bool] = dataclasses.field( |
| 49 | + default=None, metadata={"multipart_form": {"field_name": "include_page_breaks"}} |
| 50 | + ) |
33 | 51 | r"""If True, the output will include page breaks if the filetype supports it. Default: false""" |
34 | | - languages: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'languages' }}) |
| 52 | + languages: Optional[List[str]] = dataclasses.field( |
| 53 | + default=None, metadata={"multipart_form": {"field_name": "languages"}} |
| 54 | + ) |
35 | 55 | r"""The languages present in the document, for use in partitioning and/or OCR""" |
36 | | - max_characters: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'max_characters' }}) |
| 56 | + max_characters: Optional[int] = dataclasses.field( |
| 57 | + default=None, metadata={"multipart_form": {"field_name": "max_characters"}} |
| 58 | + ) |
37 | 59 | r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500""" |
38 | | - multipage_sections: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'multipage_sections' }}) |
| 60 | + multipage_sections: Optional[bool] = dataclasses.field( |
| 61 | + default=None, metadata={"multipart_form": {"field_name": "multipage_sections"}} |
| 62 | + ) |
39 | 63 | r"""If chunking strategy is set, determines if sections can span multiple sections. Default: true""" |
40 | | - new_after_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'new_after_n_chars' }}) |
| 64 | + new_after_n_chars: Optional[int] = dataclasses.field( |
| 65 | + default=None, metadata={"multipart_form": {"field_name": "new_after_n_chars"}} |
| 66 | + ) |
41 | 67 | r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500""" |
42 | | - output_format: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'output_format' }}) |
| 68 | + output_format: Optional[str] = dataclasses.field( |
| 69 | + default=None, metadata={"multipart_form": {"field_name": "output_format"}} |
| 70 | + ) |
43 | 71 | r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json.""" |
44 | | - pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }}) |
| 72 | + pdf_infer_table_structure: Optional[bool] = dataclasses.field( |
| 73 | + default=None, |
| 74 | + metadata={"multipart_form": {"field_name": "pdf_infer_table_structure"}}, |
| 75 | + ) |
45 | 76 | r"""If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML <table>.""" |
46 | | - skip_infer_table_types: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'skip_infer_table_types' }}) |
| 77 | + skip_infer_table_types: Optional[List[str]] = dataclasses.field( |
| 78 | + default=None, |
| 79 | + metadata={"multipart_form": {"field_name": "skip_infer_table_types"}}, |
| 80 | + ) |
47 | 81 | r"""The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png']""" |
48 | | - strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'strategy' }}) |
| 82 | + strategy: Optional[str] = dataclasses.field( |
| 83 | + default=None, metadata={"multipart_form": {"field_name": "strategy"}} |
| 84 | + ) |
49 | 85 | r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto""" |
50 | | - xml_keep_tags: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'xml_keep_tags' }}) |
| 86 | + xml_keep_tags: Optional[bool] = dataclasses.field( |
| 87 | + default=None, metadata={"multipart_form": {"field_name": "xml_keep_tags"}} |
| 88 | + ) |
51 | 89 | r"""If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml.""" |
52 | | - |
53 | | - |
|
0 commit comments