|
4387 | 4387 | ] |
4388 | 4388 | } |
4389 | 4389 | }, |
| 4390 | + "/api/file/html_page": { |
| 4391 | + "post": { |
| 4392 | + "tags": [ |
| 4393 | + "File" |
| 4394 | + ], |
| 4395 | + "summary": "Upload HTML Page", |
| 4396 | + "description": "Chunk HTML by headings and queue for indexing into the specified dataset.", |
| 4397 | + "operationId": "upload_html_page", |
| 4398 | + "requestBody": { |
| 4399 | + "description": "JSON request payload to upload a file", |
| 4400 | + "content": { |
| 4401 | + "application/json": { |
| 4402 | + "schema": { |
| 4403 | + "$ref": "#/components/schemas/UploadHtmlPageReqPayload" |
| 4404 | + } |
| 4405 | + } |
| 4406 | + }, |
| 4407 | + "required": true |
| 4408 | + }, |
| 4409 | + "responses": { |
| 4410 | + "204": { |
| 4411 | + "description": "Confirmation that html is being processed" |
| 4412 | + }, |
| 4413 | + "400": { |
| 4414 | + "description": "Service error relating to processing the file", |
| 4415 | + "content": { |
| 4416 | + "application/json": { |
| 4417 | + "schema": { |
| 4418 | + "$ref": "#/components/schemas/ErrorResponseBody" |
| 4419 | + } |
| 4420 | + } |
| 4421 | + } |
| 4422 | + } |
| 4423 | + } |
| 4424 | + } |
| 4425 | + }, |
4390 | 4426 | "/api/file/{file_id}": { |
4391 | 4427 | "get": { |
4392 | 4428 | "tags": [ |
|
8460 | 8496 | "type": "string", |
8461 | 8497 | "description": "The URL to crawl", |
8462 | 8498 | "nullable": true |
| 8499 | + }, |
| 8500 | + "webhook_metadata": { |
| 8501 | + "description": "Metadata to send back with the webhook call for each successful page scrape", |
| 8502 | + "nullable": true |
| 8503 | + }, |
| 8504 | + "webhook_url": { |
| 8505 | + "type": "string", |
| 8506 | + "description": "Host to call back on the webhook for each successful page scrape", |
| 8507 | + "nullable": true |
8463 | 8508 | } |
8464 | 8509 | }, |
8465 | 8510 | "example": { |
|
9642 | 9687 | "dot" |
9643 | 9688 | ] |
9644 | 9689 | }, |
| 9690 | + "Document": { |
| 9691 | + "type": "object", |
| 9692 | + "required": [ |
| 9693 | + "metadata" |
| 9694 | + ], |
| 9695 | + "properties": { |
| 9696 | + "extract": { |
| 9697 | + "type": "string", |
| 9698 | + "nullable": true |
| 9699 | + }, |
| 9700 | + "html": { |
| 9701 | + "type": "string", |
| 9702 | + "nullable": true |
| 9703 | + }, |
| 9704 | + "links": { |
| 9705 | + "type": "array", |
| 9706 | + "items": { |
| 9707 | + "type": "string" |
| 9708 | + }, |
| 9709 | + "nullable": true |
| 9710 | + }, |
| 9711 | + "markdown": { |
| 9712 | + "type": "string", |
| 9713 | + "nullable": true |
| 9714 | + }, |
| 9715 | + "metadata": { |
| 9716 | + "$ref": "#/components/schemas/Metadata" |
| 9717 | + }, |
| 9718 | + "rawHtml": { |
| 9719 | + "type": "string", |
| 9720 | + "nullable": true |
| 9721 | + }, |
| 9722 | + "screenshot": { |
| 9723 | + "type": "string", |
| 9724 | + "nullable": true |
| 9725 | + } |
| 9726 | + } |
| 9727 | + }, |
9645 | 9728 | "EditMessageReqPayload": { |
9646 | 9729 | "type": "object", |
9647 | 9730 | "required": [ |
@@ -11680,6 +11763,152 @@ |
11680 | 11763 | "updated_at": "2021-01-01 00:00:00.000" |
11681 | 11764 | } |
11682 | 11765 | }, |
| 11766 | + "Metadata": { |
| 11767 | + "type": "object", |
| 11768 | + "properties": { |
| 11769 | + "articleSection": { |
| 11770 | + "type": "string", |
| 11771 | + "nullable": true |
| 11772 | + }, |
| 11773 | + "articleTag": { |
| 11774 | + "type": "string", |
| 11775 | + "nullable": true |
| 11776 | + }, |
| 11777 | + "dcDate": { |
| 11778 | + "type": "string", |
| 11779 | + "nullable": true |
| 11780 | + }, |
| 11781 | + "dcDateCreated": { |
| 11782 | + "type": "string", |
| 11783 | + "nullable": true |
| 11784 | + }, |
| 11785 | + "dcDescription": { |
| 11786 | + "type": "string", |
| 11787 | + "nullable": true |
| 11788 | + }, |
| 11789 | + "dcSubject": { |
| 11790 | + "type": "string", |
| 11791 | + "nullable": true |
| 11792 | + }, |
| 11793 | + "dcTermsAudience": { |
| 11794 | + "type": "string", |
| 11795 | + "nullable": true |
| 11796 | + }, |
| 11797 | + "dcTermsCreated": { |
| 11798 | + "type": "string", |
| 11799 | + "nullable": true |
| 11800 | + }, |
| 11801 | + "dcTermsKeywords": { |
| 11802 | + "type": "string", |
| 11803 | + "nullable": true |
| 11804 | + }, |
| 11805 | + "dcTermsSubject": { |
| 11806 | + "type": "string", |
| 11807 | + "nullable": true |
| 11808 | + }, |
| 11809 | + "dcTermsType": { |
| 11810 | + "type": "string", |
| 11811 | + "nullable": true |
| 11812 | + }, |
| 11813 | + "dcType": { |
| 11814 | + "type": "string", |
| 11815 | + "nullable": true |
| 11816 | + }, |
| 11817 | + "description": { |
| 11818 | + "type": "string", |
| 11819 | + "nullable": true |
| 11820 | + }, |
| 11821 | + "error": { |
| 11822 | + "type": "string", |
| 11823 | + "nullable": true |
| 11824 | + }, |
| 11825 | + "keywords": { |
| 11826 | + "type": "string", |
| 11827 | + "nullable": true |
| 11828 | + }, |
| 11829 | + "language": { |
| 11830 | + "type": "string", |
| 11831 | + "nullable": true |
| 11832 | + }, |
| 11833 | + "modifiedTime": { |
| 11834 | + "type": "string", |
| 11835 | + "nullable": true |
| 11836 | + }, |
| 11837 | + "ogAudio": { |
| 11838 | + "type": "string", |
| 11839 | + "nullable": true |
| 11840 | + }, |
| 11841 | + "ogDescription": { |
| 11842 | + "type": "string", |
| 11843 | + "nullable": true |
| 11844 | + }, |
| 11845 | + "ogDeterminer": { |
| 11846 | + "type": "string", |
| 11847 | + "nullable": true |
| 11848 | + }, |
| 11849 | + "ogImage": { |
| 11850 | + "type": "string", |
| 11851 | + "nullable": true |
| 11852 | + }, |
| 11853 | + "ogLocale": { |
| 11854 | + "type": "string", |
| 11855 | + "nullable": true |
| 11856 | + }, |
| 11857 | + "ogLocaleAlternate": { |
| 11858 | + "type": "array", |
| 11859 | + "items": { |
| 11860 | + "type": "string" |
| 11861 | + }, |
| 11862 | + "nullable": true |
| 11863 | + }, |
| 11864 | + "ogSiteName": { |
| 11865 | + "type": "string", |
| 11866 | + "nullable": true |
| 11867 | + }, |
| 11868 | + "ogTitle": { |
| 11869 | + "type": "string", |
| 11870 | + "nullable": true |
| 11871 | + }, |
| 11872 | + "ogUrl": { |
| 11873 | + "type": "string", |
| 11874 | + "nullable": true |
| 11875 | + }, |
| 11876 | + "ogVideo": { |
| 11877 | + "type": "string", |
| 11878 | + "nullable": true |
| 11879 | + }, |
| 11880 | + "publishedTime": { |
| 11881 | + "type": "string", |
| 11882 | + "nullable": true |
| 11883 | + }, |
| 11884 | + "robots": { |
| 11885 | + "type": "string", |
| 11886 | + "nullable": true |
| 11887 | + }, |
| 11888 | + "site_map": { |
| 11889 | + "allOf": [ |
| 11890 | + { |
| 11891 | + "$ref": "#/components/schemas/Sitemap" |
| 11892 | + } |
| 11893 | + ], |
| 11894 | + "nullable": true |
| 11895 | + }, |
| 11896 | + "sourceURL": { |
| 11897 | + "type": "string", |
| 11898 | + "nullable": true |
| 11899 | + }, |
| 11900 | + "statusCode": { |
| 11901 | + "type": "integer", |
| 11902 | + "format": "int32", |
| 11903 | + "nullable": true, |
| 11904 | + "minimum": 0 |
| 11905 | + }, |
| 11906 | + "title": { |
| 11907 | + "type": "string", |
| 11908 | + "nullable": true |
| 11909 | + } |
| 11910 | + } |
| 11911 | + }, |
11683 | 11912 | "MmrOptions": { |
11684 | 11913 | "type": "object", |
11685 | 11914 | "description": "MMR Options lets you specify different methods to rerank the chunks in the result set using Maximal Marginal Relevance. If not specified, this defaults to the score of the chunks.", |
|
15025 | 15254 | "pos_in_queue": 1 |
15026 | 15255 | } |
15027 | 15256 | }, |
| 15257 | + "Sitemap": { |
| 15258 | + "type": "object", |
| 15259 | + "required": [ |
| 15260 | + "changefreq" |
| 15261 | + ], |
| 15262 | + "properties": { |
| 15263 | + "changefreq": { |
| 15264 | + "type": "string" |
| 15265 | + } |
| 15266 | + } |
| 15267 | + }, |
15028 | 15268 | "SlimChunkMetadata": { |
15029 | 15269 | "type": "object", |
15030 | 15270 | "required": [ |
|
16378 | 16618 | } |
16379 | 16619 | } |
16380 | 16620 | }, |
| 16621 | + "UploadHtmlPageReqPayload": { |
| 16622 | + "type": "object", |
| 16623 | + "required": [ |
| 16624 | + "data", |
| 16625 | + "metadata", |
| 16626 | + "scrapeId" |
| 16627 | + ], |
| 16628 | + "properties": { |
| 16629 | + "data": { |
| 16630 | + "$ref": "#/components/schemas/Document" |
| 16631 | + }, |
| 16632 | + "metadata": {}, |
| 16633 | + "scrapeId": { |
| 16634 | + "type": "string", |
| 16635 | + "format": "uuid" |
| 16636 | + } |
| 16637 | + } |
| 16638 | + }, |
16381 | 16639 | "UsageGraphPoint": { |
16382 | 16640 | "type": "object", |
16383 | 16641 | "required": [ |
|
0 commit comments