fix: revert the removal of the custom openapi doc (#369)

awalker4 · web-flow · commit db7780c19530 · 2024-02-19T10:26:37.000-05:00
We're now generating the proper openapi spec via FastAPI. The problem is, if we point the Speakeasy clients to the new spec, the generated code has too much churn. All good stuff if we were releasing the clients for the first time, but now it will break compatibility. We'll need to maintain a separate openapi spec for now, and add new params here whenever they go into the server. Example speakeasy churn: Unstructured-IO/unstructured-python-client#42
diff --git a/openapi.json b/openapi.json
@@ -0,0 +1,280 @@
+{
+    "openapi": "3.1.0",
+    "info": {
+        "title": "Unstructured Pipeline API",
+        "version": "0.0.1",
+        "summary": "Partition documents with the Unstructured library"
+    },
+    "servers": [
+        {
+            "url": "https://api.unstructured.io",
+            "description": "Hosted API",
+            "x-speakeasy-server-id": "prod"
+        },
+        {
+            "url": "http://localhost:8000",
+            "description": "Development server",
+            "x-speakeasy-server-id": "local"
+        }
+    ],
+    "x-speakeasy-retries": {
+        "strategy": "backoff",
+        "backoff": {
+            "initialInterval": 500,
+            "maxInterval": 60000,
+            "maxElapsedTime": 900000,
+            "exponent": 1.5
+        },
+        "statusCodes": ["5xx"],
+        "retryConnectionErrors": true
+    },
+    "security":[
+        {
+            "ApiKeyAuth":[]
+        }
+    ],
+    "tags": [
+        {
+            "name": "general"
+        }
+    ],
+    "paths": {
+        "/general/v0/general": {
+            "post": {
+                "tags": ["general"],
+                "summary": "Pipeline 1",
+                "operationId": "partition",
+                "x-speakeasy-name-override": "partition",
+                "requestBody": {
+                    "content": {
+                        "multipart/form-data": {
+                            "schema": {
+                                "$ref": "#/components/schemas/partition_parameters"
+                            }
+                        }
+                    }
+                },
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Elements"
+                                }
+                            }
+                        }
+                    },
+                    "422": {
+                        "description": "Validation Error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/HTTPValidationError"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "components": {
+        "securitySchemes":{
+            "ApiKeyAuth":{
+                "type":"apiKey",
+                "name":"unstructured-api-key",
+                "in":"header",
+                "x-speakeasy-example": "YOUR_API_KEY"
+            }
+        },
+        "schemas": {
+            "Elements":{
+                "type": "array",
+                "items":{
+                    "Element":{
+                        "type":"object",
+                        "properties": {
+                            "type": {},
+                            "element_id": {},
+                            "metadata": {},
+                            "text": {}
+                        }
+                    }
+                }
+            },
+            "partition_parameters": {
+                "properties": {
+                    "files": {
+                        "type": "string",
+                        "format": "binary",
+                        "description": "The file to extract",
+                        "required": "true",
+                        "example": {
+                            "summary": "File to be partitioned",
+                            "externalValue": "https://github.com/Unstructured-IO/unstructured/blob/98d3541909f64290b5efb65a226fc3ee8a7cc5ee/example-docs/layout-parser-paper.pdf"
+                        }
+                    },
+                    "strategy": {
+                        "type": "string",
+                        "title": "Strategy",
+                        "description": "The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto",
+                        "example": "hi_res"
+                    },
+                    "gz_uncompressed_content_type": {
+                        "type": "string",
+                        "title": "Uncompressed Content Type",
+                        "description": "If file is gzipped, use this content type after unzipping",
+                        "example": "application/pdf"
+                    },
+                    "output_format": {
+                        "type": "string",
+                        "title": "Output Format",
+                        "description": "The format of the response. Supported formats are application/json and text/csv. Default: application/json.",
+                        "example": "application/json"
+                    },
+                    "coordinates": {
+                        "type": "boolean",
+                        "title": "Coordinates",
+                        "description": "If true, return coordinates for each element. Default: false"
+                    },
+                    "encoding": {
+                        "type": "string",
+                        "title": "Encoding",
+                        "description": "The encoding method used to decode the text input. Default: utf-8",
+                        "example": "utf-8"
+                    },
+                    "hi_res_model_name": {
+                        "type": "string",
+                        "title": "Hi Res Model Name",
+                        "description": "The name of the inference model used when strategy is hi_res",
+                        "example": "yolox"
+                    },
+                    "include_page_breaks": {
+                        "type": "boolean",
+                        "title": "Include Page Breaks",
+                        "description": "If True, the output will include page breaks if the filetype supports it. Default: false"
+                    },
+                    "languages": {
+                        "items": {
+                            "type": "string",
+                            "example": "eng"
+                        },
+                        "type": "array",
+                        "title": "OCR Languages",
+                        "default": [],
+                        "description": "The languages present in the document, for use in partitioning and/or OCR",
+                        "example": "[eng]"
+                    },
+                    "pdf_infer_table_structure": {
+                        "type": "boolean",
+                        "title": "Pdf Infer Table Structure",
+                        "description": "If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML <table>."
+                    },
+                    "skip_infer_table_types": {
+                        "items": {
+                            "type": "string",
+                            "example": "pdf"
+                        },
+                        "type": "array",
+                        "title": "Skip Infer Table Types",
+                        "description": "The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png']"
+                    },
+                    "xml_keep_tags": {
+                        "type": "boolean",
+                        "title": "Xml Keep Tags",
+                        "description": "If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml."
+                    },
+                    "chunking_strategy": {
+                        "type": "string",
+                        "title": "Chunking Strategy",
+                        "description": "Use one of the supported strategies to chunk the returned elements. Currently supports: by_title",
+                        "example": "by_title"
+                    },
+                    "multipage_sections": {
+                        "type": "boolean",
+                        "title": "Multipage Sections",
+                        "description": "If chunking strategy is set, determines if sections can span multiple sections. Default: true"
+                    },
+                    "combine_under_n_chars": {
+                        "type": "integer",
+                        "title": "Combine Under N Chars",
+                        "description": "If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500",
+                        "example": 500
+                    },
+                    "new_after_n_chars": {
+                        "type": "integer",
+                        "title": "New after n chars",
+                        "description": "If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500",
+                        "example": 1500
+                    },
+                    "max_characters": {
+                        "type": "integer",
+                        "title": "Max Characters",
+                        "description": "If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500",
+                        "example": 1500
+                    },
+                    "extract_image_block_types": {
+                        "items": {
+                            "type": "string",
+                            "example": "image"
+                        },
+                        "type": "array",
+                        "title": "Image block types to extract",
+                        "default": [],
+                        "description": "The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields",
+                        "example": ["image", "table"]
+                    }
+                },
+                "type": "object",
+                "title": "Partition Parameters"
+            },
+            "HTTPValidationError": {
+                "properties": {
+                    "detail": {
+                        "items": {
+                            "$ref": "#/components/schemas/ValidationError"
+                        },
+                        "type": "array",
+                        "title": "Detail"
+                    }
+                },
+                "type": "object",
+                "title": "HTTPValidationError"
+            },
+            "ValidationError": {
+                "properties": {
+                    "loc": {
+                        "items": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "integer"
+                                }
+                            ]
+                        },
+                        "type": "array",
+                        "title": "Location"
+                    },
+                    "msg": {
+                        "type": "string",
+                        "title": "Message"
+                    },
+                    "type": {
+                        "type": "string",
+                        "title": "Error Type"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "loc",
+                    "msg",
+                    "type"
+                ],
+                "title": "ValidationError"
+            }
+        }
+    }
+}