Skip to content

Commit db7780c

Browse files
authored
fix: revert the removal of the custom openapi doc (#369)
We're now generating the proper openapi spec via FastAPI. The problem is, if we point the Speakeasy clients to the new spec, the generated code has too much churn. All good stuff if we were releasing the clients for the first time, but now it will break compatibility. We'll need to maintain a separate openapi spec for now, and add new params here whenever they go into the server. Example speakeasy churn: Unstructured-IO/unstructured-python-client#42
1 parent 561b9d8 commit db7780c

File tree

1 file changed

+280
-0
lines changed

1 file changed

+280
-0
lines changed

openapi.json

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
{
2+
"openapi": "3.1.0",
3+
"info": {
4+
"title": "Unstructured Pipeline API",
5+
"version": "0.0.1",
6+
"summary": "Partition documents with the Unstructured library"
7+
},
8+
"servers": [
9+
{
10+
"url": "https://api.unstructured.io",
11+
"description": "Hosted API",
12+
"x-speakeasy-server-id": "prod"
13+
},
14+
{
15+
"url": "http://localhost:8000",
16+
"description": "Development server",
17+
"x-speakeasy-server-id": "local"
18+
}
19+
],
20+
"x-speakeasy-retries": {
21+
"strategy": "backoff",
22+
"backoff": {
23+
"initialInterval": 500,
24+
"maxInterval": 60000,
25+
"maxElapsedTime": 900000,
26+
"exponent": 1.5
27+
},
28+
"statusCodes": ["5xx"],
29+
"retryConnectionErrors": true
30+
},
31+
"security":[
32+
{
33+
"ApiKeyAuth":[]
34+
}
35+
],
36+
"tags": [
37+
{
38+
"name": "general"
39+
}
40+
],
41+
"paths": {
42+
"/general/v0/general": {
43+
"post": {
44+
"tags": ["general"],
45+
"summary": "Pipeline 1",
46+
"operationId": "partition",
47+
"x-speakeasy-name-override": "partition",
48+
"requestBody": {
49+
"content": {
50+
"multipart/form-data": {
51+
"schema": {
52+
"$ref": "#/components/schemas/partition_parameters"
53+
}
54+
}
55+
}
56+
},
57+
"responses": {
58+
"200": {
59+
"description": "Successful Response",
60+
"content": {
61+
"application/json": {
62+
"schema": {
63+
"$ref": "#/components/schemas/Elements"
64+
}
65+
}
66+
}
67+
},
68+
"422": {
69+
"description": "Validation Error",
70+
"content": {
71+
"application/json": {
72+
"schema": {
73+
"$ref": "#/components/schemas/HTTPValidationError"
74+
}
75+
}
76+
}
77+
}
78+
}
79+
}
80+
}
81+
},
82+
"components": {
83+
"securitySchemes":{
84+
"ApiKeyAuth":{
85+
"type":"apiKey",
86+
"name":"unstructured-api-key",
87+
"in":"header",
88+
"x-speakeasy-example": "YOUR_API_KEY"
89+
}
90+
},
91+
"schemas": {
92+
"Elements":{
93+
"type": "array",
94+
"items":{
95+
"Element":{
96+
"type":"object",
97+
"properties": {
98+
"type": {},
99+
"element_id": {},
100+
"metadata": {},
101+
"text": {}
102+
}
103+
}
104+
}
105+
},
106+
"partition_parameters": {
107+
"properties": {
108+
"files": {
109+
"type": "string",
110+
"format": "binary",
111+
"description": "The file to extract",
112+
"required": "true",
113+
"example": {
114+
"summary": "File to be partitioned",
115+
"externalValue": "https://github.com/Unstructured-IO/unstructured/blob/98d3541909f64290b5efb65a226fc3ee8a7cc5ee/example-docs/layout-parser-paper.pdf"
116+
}
117+
},
118+
"strategy": {
119+
"type": "string",
120+
"title": "Strategy",
121+
"description": "The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto",
122+
"example": "hi_res"
123+
},
124+
"gz_uncompressed_content_type": {
125+
"type": "string",
126+
"title": "Uncompressed Content Type",
127+
"description": "If file is gzipped, use this content type after unzipping",
128+
"example": "application/pdf"
129+
},
130+
"output_format": {
131+
"type": "string",
132+
"title": "Output Format",
133+
"description": "The format of the response. Supported formats are application/json and text/csv. Default: application/json.",
134+
"example": "application/json"
135+
},
136+
"coordinates": {
137+
"type": "boolean",
138+
"title": "Coordinates",
139+
"description": "If true, return coordinates for each element. Default: false"
140+
},
141+
"encoding": {
142+
"type": "string",
143+
"title": "Encoding",
144+
"description": "The encoding method used to decode the text input. Default: utf-8",
145+
"example": "utf-8"
146+
},
147+
"hi_res_model_name": {
148+
"type": "string",
149+
"title": "Hi Res Model Name",
150+
"description": "The name of the inference model used when strategy is hi_res",
151+
"example": "yolox"
152+
},
153+
"include_page_breaks": {
154+
"type": "boolean",
155+
"title": "Include Page Breaks",
156+
"description": "If True, the output will include page breaks if the filetype supports it. Default: false"
157+
},
158+
"languages": {
159+
"items": {
160+
"type": "string",
161+
"example": "eng"
162+
},
163+
"type": "array",
164+
"title": "OCR Languages",
165+
"default": [],
166+
"description": "The languages present in the document, for use in partitioning and/or OCR",
167+
"example": "[eng]"
168+
},
169+
"pdf_infer_table_structure": {
170+
"type": "boolean",
171+
"title": "Pdf Infer Table Structure",
172+
"description": "If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML <table>."
173+
},
174+
"skip_infer_table_types": {
175+
"items": {
176+
"type": "string",
177+
"example": "pdf"
178+
},
179+
"type": "array",
180+
"title": "Skip Infer Table Types",
181+
"description": "The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png']"
182+
},
183+
"xml_keep_tags": {
184+
"type": "boolean",
185+
"title": "Xml Keep Tags",
186+
"description": "If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml."
187+
},
188+
"chunking_strategy": {
189+
"type": "string",
190+
"title": "Chunking Strategy",
191+
"description": "Use one of the supported strategies to chunk the returned elements. Currently supports: by_title",
192+
"example": "by_title"
193+
},
194+
"multipage_sections": {
195+
"type": "boolean",
196+
"title": "Multipage Sections",
197+
"description": "If chunking strategy is set, determines if sections can span multiple sections. Default: true"
198+
},
199+
"combine_under_n_chars": {
200+
"type": "integer",
201+
"title": "Combine Under N Chars",
202+
"description": "If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500",
203+
"example": 500
204+
},
205+
"new_after_n_chars": {
206+
"type": "integer",
207+
"title": "New after n chars",
208+
"description": "If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500",
209+
"example": 1500
210+
},
211+
"max_characters": {
212+
"type": "integer",
213+
"title": "Max Characters",
214+
"description": "If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500",
215+
"example": 1500
216+
},
217+
"extract_image_block_types": {
218+
"items": {
219+
"type": "string",
220+
"example": "image"
221+
},
222+
"type": "array",
223+
"title": "Image block types to extract",
224+
"default": [],
225+
"description": "The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields",
226+
"example": ["image", "table"]
227+
}
228+
},
229+
"type": "object",
230+
"title": "Partition Parameters"
231+
},
232+
"HTTPValidationError": {
233+
"properties": {
234+
"detail": {
235+
"items": {
236+
"$ref": "#/components/schemas/ValidationError"
237+
},
238+
"type": "array",
239+
"title": "Detail"
240+
}
241+
},
242+
"type": "object",
243+
"title": "HTTPValidationError"
244+
},
245+
"ValidationError": {
246+
"properties": {
247+
"loc": {
248+
"items": {
249+
"oneOf": [
250+
{
251+
"type": "string"
252+
},
253+
{
254+
"type": "integer"
255+
}
256+
]
257+
},
258+
"type": "array",
259+
"title": "Location"
260+
},
261+
"msg": {
262+
"type": "string",
263+
"title": "Message"
264+
},
265+
"type": {
266+
"type": "string",
267+
"title": "Error Type"
268+
}
269+
},
270+
"type": "object",
271+
"required": [
272+
"loc",
273+
"msg",
274+
"type"
275+
],
276+
"title": "ValidationError"
277+
}
278+
}
279+
}
280+
}

0 commit comments

Comments
 (0)