diff --git a/public/__redirects b/public/__redirects index c31af846df32dd7..92d5f88c26c89ab 100644 --- a/public/__redirects +++ b/public/__redirects @@ -1606,10 +1606,14 @@ # Guides section /workers-ai/demos/ /workers-ai/guides/demos-architectures/ 301 +# Features section +/workers-ai/markdown-conversion/ /workers-ai/features/markdown-conversion/ 301 + # workflows /workflows/reference/storage-options/ /workers/platform/storage-options/ 301 + # workers KV /kv/platform/environments/ /kv/reference/environments/ 301 /kv/platform/kv-commands/ /kv/reference/kv-commands/ 301 diff --git a/src/content/changelog/workers-ai/2025-03-20-markdown-conversion.mdx b/src/content/changelog/workers-ai/2025-03-20-markdown-conversion.mdx index fdb6200d1111e33..301b4380497664b 100644 --- a/src/content/changelog/workers-ai/2025-03-20-markdown-conversion.mdx +++ b/src/content/changelog/workers-ai/2025-03-20-markdown-conversion.mdx @@ -14,27 +14,30 @@ In this example, we fetch a PDF document and an image from R2 and feed them both import { Env } from "./env"; export default { - async fetch(request: Request, env: Env, ctx: ExecutionContext) { - - // https://pub-979cb28270cc461d94bc8a169d8f389d.r2.dev/somatosensory.pdf - const pdf = await env.R2.get('somatosensory.pdf'); - - // https://pub-979cb28270cc461d94bc8a169d8f389d.r2.dev/cat.jpeg - const cat = await env.R2.get('cat.jpeg'); - - return Response.json( - await env.AI.toMarkdown([ - { - name: "somatosensory.pdf", - blob: new Blob([await pdf.arrayBuffer()], { type: "application/octet-stream" }), - }, - { - name: "cat.jpeg", - blob: new Blob([await cat.arrayBuffer()], { type: "application/octet-stream" }), - }, - ]), - ); - }, + async fetch(request: Request, env: Env, ctx: ExecutionContext) { + // https://pub-979cb28270cc461d94bc8a169d8f389d.r2.dev/somatosensory.pdf + const pdf = await env.R2.get("somatosensory.pdf"); + + // https://pub-979cb28270cc461d94bc8a169d8f389d.r2.dev/cat.jpeg + const cat = await env.R2.get("cat.jpeg"); + + return Response.json( + await env.AI.toMarkdown([ + { + name: "somatosensory.pdf", + blob: new Blob([await pdf.arrayBuffer()], { + type: "application/octet-stream", + }), + }, + { + name: "cat.jpeg", + blob: new Blob([await cat.arrayBuffer()], { + type: "application/octet-stream", + }), + }, + ]), + ); + }, }; ``` @@ -59,4 +62,4 @@ This is the result: ] ``` -See [Markdown Conversion](/workers-ai/markdown-conversion/) for more information on supported formats, REST API and pricing. \ No newline at end of file +See [Markdown Conversion](/workers-ai/features/markdown-conversion/) for more information on supported formats, REST API and pricing. diff --git a/src/content/docs/autorag/concepts/how-autorag-works.mdx b/src/content/docs/autorag/concepts/how-autorag-works.mdx index ccc7bf7c361d28e..769aa30c62496c2 100644 --- a/src/content/docs/autorag/concepts/how-autorag-works.mdx +++ b/src/content/docs/autorag/concepts/how-autorag-works.mdx @@ -19,7 +19,7 @@ Indexing begins automatically when you create an AutoRAG instance and connect a Here is what happens during indexing: 1. **Data ingestion:** AutoRAG reads from your connected data source. -2. **Markdown conversion:** AutoRAG uses [Workers AI’s Markdown Conversion](/workers-ai/markdown-conversion/) to convert [supported data types](/autorag/configuration/data-source/) into structured Markdown. This ensures consistency across diverse file types. For images, Workers AI is used to perform object detection followed by vision-to-language transformation to convert images into Markdown text. +2. **Markdown conversion:** AutoRAG uses [Workers AI’s Markdown Conversion](/workers-ai/features/markdown-conversion/) to convert [supported data types](/autorag/configuration/data-source/) into structured Markdown. This ensures consistency across diverse file types. For images, Workers AI is used to perform object detection followed by vision-to-language transformation to convert images into Markdown text. 3. **Chunking:** The extracted text is [chunked](/autorag/configuration/chunking/) into smaller pieces to improve retrieval granularity. 4. **Embedding:** Each chunk is embedded using Workers AI’s embedding model to transform the content into vectors. 5. **Vector storage:** The resulting vectors, along with metadata like file name, are stored in a the [Vectorize](/vectorize/) database created on your Cloudflare account. diff --git a/src/content/docs/autorag/configuration/data-source.mdx b/src/content/docs/autorag/configuration/data-source.mdx index ce9c45d7ec7c2de..c17b807f034d0fc 100644 --- a/src/content/docs/autorag/configuration/data-source.mdx +++ b/src/content/docs/autorag/configuration/data-source.mdx @@ -55,6 +55,6 @@ AutoRAG supports the following plain text file types: ### Rich format file types -AutoRAG uses [Markdown Conversion](/workers-ai/markdown-conversion/) to convert rich format files to markdown. The following table lists the supported formats that will be converted to Markdown: +AutoRAG uses [Markdown Conversion](/workers-ai/features/markdown-conversion/) to convert rich format files to markdown. The following table lists the supported formats that will be converted to Markdown: diff --git a/src/content/docs/workers-ai/markdown-conversion.mdx b/src/content/docs/workers-ai/features/markdown-conversion.mdx similarity index 100% rename from src/content/docs/workers-ai/markdown-conversion.mdx rename to src/content/docs/workers-ai/features/markdown-conversion.mdx diff --git a/src/content/partials/workers-ai/markdown-conversion-support.mdx b/src/content/partials/workers-ai/markdown-conversion-support.mdx index c9e35e43c2b6282..7ea5c77ba962243 100644 --- a/src/content/partials/workers-ai/markdown-conversion-support.mdx +++ b/src/content/partials/workers-ai/markdown-conversion-support.mdx @@ -109,4 +109,4 @@ 1 Image conversion uses two Workers AI models for object detection and summarization. See [Workers AI -pricing](/workers-ai/markdown-conversion/#pricing) for more details. +pricing](/workers-ai/features/markdown-conversion/#pricing) for more details.