cloudflare · mchenco · Apr 3, 2025 · Mar 18, 2025 · Mar 18, 2025 · Apr 1, 2025
@@ -1528,6 +1528,38 @@
 /workers/observability/baselime-integration/ /workers/observability/integrations/baselime-integration/ 301
 /workers-ai/tutorials/image-generator-flux/ /workers-ai/tutorials/image-generation-playground/ 301
 
+# Workers AI reorganization redirects
+# Function calling
+/workers-ai/function-calling/ /workers-ai/features/function-calling/ 301
+/workers-ai/function-calling/embedded/ /workers-ai/features/function-calling/embedded/ 301
+/workers-ai/function-calling/embedded/get-started/ /workers-ai/features/function-calling/embedded/get-started/ 301
+/workers-ai/function-calling/embedded/api-reference/ /workers-ai/features/function-calling/embedded/api-reference/ 301
+/workers-ai/function-calling/embedded/examples/ /workers-ai/features/function-calling/embedded/examples/ 301
+/workers-ai/function-calling/embedded/examples/kv/ /workers-ai/features/function-calling/embedded/examples/kv/ 301
+/workers-ai/function-calling/embedded/examples/openapi/ /workers-ai/features/function-calling/embedded/examples/openapi/ 301
+/workers-ai/function-calling/embedded/examples/fetch/ /workers-ai/features/function-calling/embedded/examples/fetch/ 301
+/workers-ai/function-calling/embedded/examples/troubleshooting/ /workers-ai/features/function-calling/embedded/examples/troubleshooting/ 301
+/workers-ai/function-calling/traditional/ /workers-ai/features/function-calling/traditional/ 301
+
+# JSON Mode
+/workers-ai/json-mode/ /workers-ai/features/json-mode/ 301
+
+# Fine-tunes
+/workers-ai/fine-tunes/ /workers-ai/features/fine-tunes/ 301
+/workers-ai/fine-tunes/public-loras /workers-ai/features/fine-tunes/public-loras/ 301
+/workers-ai/fine-tunes/loras /workers-ai/features/fine-tunes/loras/ 301
+
+# Prompting
+/workers-ai/guides/prompting/ /workers-ai/features/prompting/ 301
+
+# Platform section
+/workers-ai/privacy/ /workers-ai/platform/privacy/ 301
+/workers-ai/glossary/ /workers-ai/platform/glossary/ 301
+/workers-ai/workers-ai-errors/ /workers-ai/platform/errors/ 301
+
+# Guides section
+/workers-ai/demos/ /workers-ai/guides/demos-architectures/ 301
+
 # workflows
 
 /workflows/reference/storage-options/ /workers/platform/storage-options/ 301
@@ -1867,6 +1899,11 @@
 /workers/templates/pages/* /workers/examples/:splat 301
 /workers/observability/logging/* /workers/observability/logs/:splat 301
 
+# Workers AI
+/workers-ai/demos/* /workers-ai/guides/demos-architectures/:splat 301
+/workers-ai/tutorials/* /workers-ai/guides/tutorials/:splat 301
+
+
 # Others
 /logs/analytics-integrations/* /fundamentals/data-products/analytics-integrations/:splat 301
 /fundamentals/notifications/* /notifications/:splat 301

@@ -6,7 +6,7 @@ date: 2025-02-25T15:00:00Z
 
 import { TypeScriptExample } from "~/components";
 
-Workers AI now supports structured JSON outputs with [JSON mode](/workers-ai/json-mode/), which allows you to request a structured output response when interacting with AI models.
+Workers AI now supports structured JSON outputs with [JSON mode](/workers-ai/features/json-mode/), which allows you to request a structured output response when interacting with AI models.
 
 This makes it much easier to retrieve structured data from your AI models, and avoids the (error prone!) need to parse large unstructured text responses to extract your data.
 
@@ -23,13 +23,13 @@ interface Env {
 
 // Define your JSON schema for a calendar event
 const CalendarEventSchema = {
-  type: 'object',
-  properties: {
-    name: { type: 'string' },
-    date: { type: 'string' },
-    participants: { type: 'array', items: { type: 'string' } },
-  },
-  required: ['name', 'date', 'participants']
+	type: "object",
+	properties: {
+		name: { type: "string" },
+		date: { type: "string" },
+		participants: { type: "array", items: { type: "string" } },
+	},
+	required: ["name", "date", "participants"],
 };
 
 export default {
@@ -42,29 +42,32 @@ export default {
 		});
 
 		const response = await client.chat.completions.create({
-	    model: 'gpt-4o-2024-08-06',
-	    messages: [
-	      { role: 'system', content: 'Extract the event information.' },
-	      { role: 'user', content: 'Alice and Bob are going to a science fair on Friday.' },
-	    ],
+			model: "gpt-4o-2024-08-06",
+			messages: [
+				{ role: "system", content: "Extract the event information." },
+				{
+					role: "user",
+					content: "Alice and Bob are going to a science fair on Friday.",
+				},
+			],
 			// Use the `response_format` option to request a structured JSON output
-	    response_format: {
+			response_format: {
 				// Set json_schema and provide ra schema, or json_object and parse it yourself
-	      type: 'json_schema',
-	      schema: CalendarEventSchema, // provide a schema
-	    },
-	  });
+				type: "json_schema",
+				schema: CalendarEventSchema, // provide a schema
+			},
+		});
 
 		// This will be of type CalendarEventSchema
 		const event = response.choices[0].message.parsed;
 
 		return Response.json({
-			"calendar_event": event,
-		})
-	}
-}
+			calendar_event: event,
+		});
+	},
+};
 ```
 
 </TypeScriptExample>
 
-To learn more about JSON mode and structured outputs, visit the [Workers AI documentation](/workers-ai/json-mode/).
+To learn more about JSON mode and structured outputs, visit the [Workers AI documentation](/workers-ai/features/json-mode/).
@@ -49,6 +49,6 @@ In the context of Retrieval-Augmented Generation (RAG), knowledge seeding involv
 
 ## Related resources
 
-- [Tutorial: Build a RAG AI](/workers-ai/tutorials/build-a-retrieval-augmented-generation-ai/)
+- [Tutorial: Build a RAG AI](/workers-ai/guides/tutorials/build-a-retrieval-augmented-generation-ai/)
 - [Workers AI: Text embedding models](/workers-ai/models/)
 - [Workers AI: Text generation models](/workers-ai/models/)
@@ -28,7 +28,7 @@ This version of the integration is aimed at workflows that require interaction w
 
 1. A user makes a request to a [Worker](https://workers.cloudflare.com/) endpoint. (Which can optionally incorporate [Access](/cloudflare-one/policies/access/) in front of it to authenticate users).
 2. Worker fetches [securely stored](/workers/configuration/secrets/) Google Cloud Platform service account information such as service key and generates a JSON Web Token to issue an authenticated API request to BigQuery.
-3. Worker receives the data from BigQuery and [transforms it into a format](/workers-ai/tutorials/using-bigquery-with-workers-ai/#6-format-results-from-the-query) that will make it easier to iterate when interacting with Workers AI.
+3. Worker receives the data from BigQuery and [transforms it into a format](/workers-ai/guides/tutorials/using-bigquery-with-workers-ai/#6-format-results-from-the-query) that will make it easier to iterate when interacting with Workers AI.
 4. Using its [native integration](/workers-ai/configuration/bindings/) with Workers AI, the Worker forwards the data from BigQuery which is then run against one of Cloudflare's hosted AI models.
 5. The original data retrieved from BigQuery alongside the AI-generated information is returned to the user as a response to the request initiated in step 1.
 
@@ -40,20 +40,20 @@ For periodic or longer workflows, you may opt for a batch approach. This diagram
 
 1. [A Cron Trigger](/workers/configuration/cron-triggers/) invokes the Worker without any user interaction.
 2. Worker fetches [securely stored](/workers/configuration/secrets/) Google Cloud Platform service account information such as service key and generates a JSON Web Token to issue an authenticated API request to BigQuery.
-3. Worker receives the data from BigQuery and [transforms it into a format](/workers-ai/tutorials/using-bigquery-with-workers-ai/#6-format-results-from-the-query) that will make it easier to iterate when interacting with Workers AI.
+3. Worker receives the data from BigQuery and [transforms it into a format](/workers-ai/guides/tutorials/using-bigquery-with-workers-ai/#6-format-results-from-the-query) that will make it easier to iterate when interacting with Workers AI.
 4. Using its [native integration](/workers-ai/configuration/bindings/) with Workers AI, the Worker forwards the data from BigQuery to generate some content related to it.
 5. Optionally, you can store the BigQuery data and the AI-generated data in a variety of different Cloudflare services.
-    * Into [D1](/d1/), a SQL database.
-    * If in step four you used Workers AI to generate embeddings, you can store them in [Vectorize](/vectorize/). To learn more about this type of solution, please consider reviewing the reference architecture diagram on [Retrieval Augmented Generation](/reference-architecture/diagrams/ai/ai-rag/).
-    * To [Workers KV](/kv/) if the output of your data will be stored and consumed in a key/value fashion.
-    * If you prefer to save the data fetched from BigQuery and Workers AI into objects (such as images, files, JSONs), you can use [R2](/r2/), our egress-free object storage to do so.
+   - Into [D1](/d1/), a SQL database.
+   - If in step four you used Workers AI to generate embeddings, you can store them in [Vectorize](/vectorize/). To learn more about this type of solution, please consider reviewing the reference architecture diagram on [Retrieval Augmented Generation](/reference-architecture/diagrams/ai/ai-rag/).
+   - To [Workers KV](/kv/) if the output of your data will be stored and consumed in a key/value fashion.
+   - If you prefer to save the data fetched from BigQuery and Workers AI into objects (such as images, files, JSONs), you can use [R2](/r2/), our egress-free object storage to do so.
 6. You can set up an integration so a system or a user gets notified whenever a new result is available or if an error occurs. It's also worth mentioning that Workers by themselves can already provide additional [observability](/workers/observability/).
-    * Sending an email with all the data retrieved and generated in the previous step is possible using [Email Routing](/email-routing/email-workers/send-email-workers/).
-    * Since Workers allows you to issue HTTP requests, you can notify a webhook or API endpoint once the process finishes or if there's an error.
+   - Sending an email with all the data retrieved and generated in the previous step is possible using [Email Routing](/email-routing/email-workers/send-email-workers/).
+   - Since Workers allows you to issue HTTP requests, you can notify a webhook or API endpoint once the process finishes or if there's an error.
 
 ## Related resources
 
-- [Tutorial: Using BigQuery with Workers AI](/workers-ai/tutorials/using-bigquery-with-workers-ai/)
+- [Tutorial: Using BigQuery with Workers AI](/workers-ai/guides/tutorials/using-bigquery-with-workers-ai/)
 - [Workers AI: Get Started](/workers-ai/get-started/workers-wrangler/)
 - [Workers: Secrets](/workers/configuration/secrets/)
 - [Workers: Cron Triggers](/workers/runtime-apis/handlers/scheduled/)
@@ -65,4 +65,3 @@ For periodic or longer workflows, you may opt for a batch approach. This diagram
 - [Workers KV](/kv/)
 - [R2](/r2/)
 - [D1](/d1/)
-
@@ -279,6 +279,6 @@ By finishing this tutorial, you have successfully created a Vectorize index, use
 
 ## Next steps
 
-- Build a [generative AI chatbot](/workers-ai/tutorials/build-a-retrieval-augmented-generation-ai/) using Workers AI and Vectorize.
+- Build a [generative AI chatbot](/workers-ai/guides/tutorials/build-a-retrieval-augmented-generation-ai/) using Workers AI and Vectorize.
 - Learn more about [how vector databases work](/vectorize/reference/what-is-a-vector-database/).
 - Read [examples](/vectorize/reference/client-api/) on how to use the Vectorize API from Cloudflare Workers.
@@ -3,7 +3,6 @@ title: Vector databases
 pcx_content_type: concept
 sidebar:
   order: 2
-
 ---
 
 Vector databases are a key part of building scalable AI-powered applications. Vector databases provide long term memory, on top of an existing machine learning model.
@@ -14,10 +13,10 @@ Without a vector database, you would need to train your model (or models) or re-
 
 A vector database determines what other data (represented as vectors) is near your input query. This allows you to build different use-cases on top of a vector database, including:
 
-* Semantic search, used to return results similar to the input of the query.
-* Classification, used to return the grouping (or groupings) closest to the input query.
-* Recommendation engines, used to return content similar to the input based on different criteria (for example previous product sales, or user history).
-* Anomaly detection, used to identify whether specific data points are similar to existing data, or different.
+- Semantic search, used to return results similar to the input of the query.
+- Classification, used to return the grouping (or groupings) closest to the input query.
+- Recommendation engines, used to return content similar to the input based on different criteria (for example previous product sales, or user history).
+- Anomaly detection, used to identify whether specific data points are similar to existing data, or different.
 
 Vector databases can also power [Retrieval Augmented Generation](https://arxiv.org/abs/2005.11401) (RAG) tasks, which allow you to bring additional context to LLMs (Large Language Models) by using the context from a vector search to augment the user prompt.
 
@@ -44,16 +43,17 @@ Instead of passing the prompt directly to the LLM, in the RAG approach you:
 1. Generate vector embeddings from an existing dataset or corpus (for example, the dataset you want to use to add additional context to the LLMs response). An existing dataset or corpus could be a product documentation, research data, technical specifications, or your product catalog and descriptions.
 2. Store the output embeddings in a Vectorize database index.
 
-When a user initiates a prompt, instead of passing it (without additional context) to the LLM, you *augment* it with additional context:
+When a user initiates a prompt, instead of passing it (without additional context) to the LLM, you _augment_ it with additional context:
 
 1. The user prompt is passed into the same ML model used for your dataset, returning a vector embedding representation of the query.
 2. This embedding is used as the query (semantic search) against the vector database, which returns similar vectors.
 3. These vectors are used to look up the content they relate to (if not embedded directly alongside the vectors as metadata).
 4. This content is provided as context alongside the original user prompt, providing additional context to the LLM and allowing it to return an answer that is likely to be far more contextual than the standalone prompt.
 
-Refer to the [RAG using Workers AI tutorial](/workers-ai/tutorials/build-a-retrieval-augmented-generation-ai/) to learn how to combine Workers AI and Vectorize for generative AI use-cases.
+Refer to the [RAG using Workers AI tutorial](/workers-ai/guides/tutorials/build-a-retrieval-augmented-generation-ai/) to learn how to combine Workers AI and Vectorize for generative AI use-cases.
 
-<sup>1</sup> You can learn more about the theory behind RAG by reading the [RAG paper](https://arxiv.org/abs/2005.11401).
+<sup>1</sup> You can learn more about the theory behind RAG by reading the [RAG
+paper](https://arxiv.org/abs/2005.11401).
 
 ## Terminology
 
@@ -85,9 +85,9 @@ Refer to the [dimensions](/vectorize/best-practices/create-indexes/#dimensions)
 
 The distance metric is an index used for vector search. It defines how it determines how close your query vector is to other vectors within the index.
 
-* Distance metrics determine how the vector search engine assesses similarity between vectors.
-* Cosine, Euclidean (L2), and Dot Product are the most commonly used distance metrics in vector search.
-* The machine learning model and type of embedding you use will determine which distance metric is best suited for your use-case.
-* Different metrics determine different scoring characteristics. For example, the `cosine` distance metric is well suited to text, sentence similarity and/or document search use-cases. `euclidean` can be better suited for image or speech recognition use-cases.
+- Distance metrics determine how the vector search engine assesses similarity between vectors.
+- Cosine, Euclidean (L2), and Dot Product are the most commonly used distance metrics in vector search.
+- The machine learning model and type of embedding you use will determine which distance metric is best suited for your use-case.
+- Different metrics determine different scoring characteristics. For example, the `cosine` distance metric is well suited to text, sentence similarity and/or document search use-cases. `euclidean` can be better suited for image or speech recognition use-cases.
 
 Refer to the [distance metrics](/vectorize/best-practices/create-indexes/#distance-metrics) documentation to learn how to configure a distance metric when creating a Vectorize index.
@@ -0,0 +1,14 @@
+---
+title: Agents
+pcx_content_type: navigation
+external_link: /agents/
+sidebar:
+  order: 7
+---
+
+import { LinkButton } from "~/components"
+
+<div style={{ textAlign: 'center', marginBottom: '2rem' }}>
+  <p>Build AI assistants that can perform complex tasks on behalf of your users using Cloudflare Workers AI and Agents.</p>
+  <LinkButton href="/agents/">Go to Agents documentation</LinkButton>
+</div>
@@ -4,7 +4,7 @@ title: Changelog
 release_notes_file_name:
   - workers-ai
 sidebar:
-  order: 8
+  order: 9
 head: []
 description: Review recent changes to Cloudflare Workers AI.
 ---

@@ -2,25 +2,23 @@
 pcx_content_type: navigation
 title: Fine-tunes
 sidebar:
-  order: 5
-
+  order: 3
 ---
 
-import { Feature } from "~/components"
+import { Feature } from "~/components";
 
 Learn how to use Workers AI to get fine-tuned inference.
 
-<Feature header="Fine-tuned inference with LoRAs" href="/workers-ai/fine-tunes/loras/" cta="Run inference with LoRAs">
+<Feature header="Fine-tuned inference with LoRAs" href="/workers-ai/features/fine-tunes/loras/" cta="Run inference with LoRAs">
 
 Upload a LoRA adapter and run fine-tuned inference with one of our base models.
 
-
 </Feature>
 
-***
+---
 
 ## What is fine-tuning?
 
 Fine-tuning is a general term for modifying an AI model by continuing to train it with additional data. The goal of fine-tuning is to increase the probability that a generation is similar to your dataset. Training a model from scratch is not practical for many use cases given how expensive and time consuming they can be to train. By fine-tuning an existing pre-trained model, you benefit from its capabilities while also accomplishing your desired task.
 
-[Low-Rank Adaptation](https://arxiv.org/abs/2106.09685) (LoRA) is a specific fine-tuning method that can be applied to various model architectures, not just LLMs. It is common that the pre-trained model weights are directly modified or fused with additional fine-tune weights in traditional fine-tuning methods. LoRA, on the other hand, allows for the fine-tune weights and pre-trained model to remain separate, and for the pre-trained model to remain unchanged. The end result is that you can train models to be more accurate  at specific tasks, such as generating code, having a specific personality, or generating images in a specific style.
+[Low-Rank Adaptation](https://arxiv.org/abs/2106.09685) (LoRA) is a specific fine-tuning method that can be applied to various model architectures, not just LLMs. It is common that the pre-trained model weights are directly modified or fused with additional fine-tune weights in traditional fine-tuning methods. LoRA, on the other hand, allows for the fine-tune weights and pre-trained model to remain separate, and for the pre-trained model to remain unchanged. The end result is that you can train models to be more accurate at specific tasks, such as generating code, having a specific personality, or generating images in a specific style.