removed the why

daisyfaithauma · daisyfaithauma · commit 5cebaebf007f · 2025-04-07T16:18:53.000+01:00
diff --git a/src/content/docs/workers-ai/features/async-batch-api.mdx b/src/content/docs/workers-ai/features/async-batch-api.mdx
@@ -15,17 +15,6 @@ Asynchronous batch processing lets you send a collection (batch) of inference re
 
 When you send a batch request, the API immediately acknowledges receipt with a status like `"queued"` and provides a unique `request_id`. This ID is later used to poll for the final responses once the processing is complete.
 
-### Why is it important?
-
-- **Handling large workloads:**
-  Ideal for use cases like summarizing large documents or generating embeddings from many data points. Instead of overwhelming the system with thousands of individual requests, you can bundle them into a single, manageable batch.
-
-- **Improved resource management:**
-  In a serverless environment, resources such as GPUs are limited. Async processing avoids the scenario where a sudden influx of requests leads to capacity issues or cold starts, allowing the platform to auto-scale more gracefully.
-
-- **Enhanced reliability:**
-  Guarantees that even if your batch request is queued due to high demand, every individual inference will eventually be processed. This separation between request submission and result delivery helps maintain system performance for real-time applications.
-
 ## 2. Prerequisites and setup
 
 <Render file="prereqs" product="workers" />
@@ -36,11 +25,7 @@ Open your terminal and run the following command:
 
 Create a new Worker project named `batch-api` by running:
 
-<PackageManagers
-	type="create"
-	pkg="cloudflare@latest"
-	args={"batch-api"}
-/>
+<PackageManagers type="create" pkg="cloudflare@latest" args={"batch-api"} />
 
 <Render
 	file="c3-post-run-steps"
@@ -82,31 +67,30 @@ Your binding is [available in your Worker code](/workers/reference/migrate-to-mo
 
 ## 4. How to use the Batch API
 
-###  1. Sending a Batch request
+### 1. Sending a Batch request
 
 Send your initial batch inference request by composing a JSON payload containing an array of individual inference requests. Ensure that the total payload is under 25 MB.
 
-
 ```javascript title=Example code
 // Input: JSON with an array of individual request JSONs
 const resp = env.AI.run("@cf/meta/llama-3.3-70b-instruct-batch", {
-  "prompts": [
-    {
-      "prompt": "tell me a joke",
-      "temperature": 0.5,
-      "max_tokens": 100
-    },
-    {
-      "prompt": "write an email from user to provider.",
-      "temperature": 0.6,
-      "max_tokens": 101
-    },
-    {
-      "prompt": "tell me a joke about llamas",
-      "temperature": 0.7,
-      "max_tokens": 102
-    }
-  ]
+	prompts: [
+		{
+			prompt: "tell me a joke",
+			temperature: 0.5,
+			max_tokens: 100,
+		},
+		{
+			prompt: "write an email from user to provider.",
+			temperature: 0.6,
+			max_tokens: 101,
+		},
+		{
+			prompt: "tell me a joke about llamas",
+			temperature: 0.7,
+			max_tokens: 102,
+		},
+	],
 });
 ```
 
@@ -116,9 +100,9 @@ After sending your batch request, you will receive a response similar to:
 
 ```json
 {
-  "status": "queued",
-  "request_id": "000-000-000",
-  "model": "@cf/meta/llama-3.3-70b-instruct-batch"
+	"status": "queued",
+	"request_id": "000-000-000",
+	"model": "@cf/meta/llama-3.3-70b-instruct-batch"
 }
 ```
 
@@ -132,15 +116,17 @@ Once your batch request is queued, use the `request_id` to poll for its status.
 
 ```javascript title=example
 // Polling the status of the batch request using the request_id
-const status = env.AI.run("@cf/meta/llama-3.3-70b-instruct-batch", { "request_id": "000-000-000" });
+const status = env.AI.run("@cf/meta/llama-3.3-70b-instruct-batch", {
+	request_id: "000-000-000",
+});
 ```
 
 #### Expected polling response (while queued)
 
 ```json
 {
-  "status": "queued",
-  "request_id": "000-000-000"
+	"status": "queued",
+	"request_id": "000-000-000"
 }
 ```
 
@@ -150,40 +136,40 @@ When the inference is complete, the API returns a final HTTP status code of `200
 
 ```json title=Example complete response
 {
-  "responses": [
-    {
-      "id": 2,
-      "result": {
-        "result": {
-          "response": "\nHere's one:\n\nWhy did the llama refuse to play poker?\n\nBecause he always got fleeced!\n\n(Sorry, it's a bit of a woolly joke, but I hope it made you smile!)"
-        }
-      },
-      "success": true
-    },
-    {
-      "id": 0,
-      "result": {
-        "result": {
-          "response": ", please!\nHere's one:\n\nWhat do you call a fake noodle?\n\n(wait for it...)\n\nAn impasta!\n\nHope that made you laugh! Do you want to hear another one? \n#joke #humor #funny #laugh #smile #noodle #impasta #pastajoke\nHow was that? Do you want another one? I have a million of them!\n\nHere's another one:\n\nWhat do you call a can opener that doesn't work?\n\n(wait"
-        }
-      },
-      "success": true
-    },
-    {
-      "id": 1,
-      "result": {
-        "result": {
-          "response": " The user is asking for a refund for a service that was not provided.\nHere is an example of an email that a user might send to a provider requesting a refund for a service that was not provided:\nSubject: Request for Refund for Undelivered Service\n\nDear [Provider's Name],\n\nI am writing to request a refund for the [service name] that I was supposed to receive from your company on [date]. Unfortunately, the service was not provided as agreed upon, and I have not"
-        }
-      },
-      "success": true
-    }
-  ],
-  "usage": {
-    "prompt_tokens": 22,
-    "completion_tokens": 243,
-    "total_tokens": 265
-  }
+	"responses": [
+		{
+			"id": 2,
+			"result": {
+				"result": {
+					"response": "\nHere's one:\n\nWhy did the llama refuse to play poker?\n\nBecause he always got fleeced!\n\n(Sorry, it's a bit of a woolly joke, but I hope it made you smile!)"
+				}
+			},
+			"success": true
+		},
+		{
+			"id": 0,
+			"result": {
+				"result": {
+					"response": ", please!\nHere's one:\n\nWhat do you call a fake noodle?\n\n(wait for it...)\n\nAn impasta!\n\nHope that made you laugh! Do you want to hear another one? \n#joke #humor #funny #laugh #smile #noodle #impasta #pastajoke\nHow was that? Do you want another one? I have a million of them!\n\nHere's another one:\n\nWhat do you call a can opener that doesn't work?\n\n(wait"
+				}
+			},
+			"success": true
+		},
+		{
+			"id": 1,
+			"result": {
+				"result": {
+					"response": " The user is asking for a refund for a service that was not provided.\nHere is an example of an email that a user might send to a provider requesting a refund for a service that was not provided:\nSubject: Request for Refund for Undelivered Service\n\nDear [Provider's Name],\n\nI am writing to request a refund for the [service name] that I was supposed to receive from your company on [date]. Unfortunately, the service was not provided as agreed upon, and I have not"
+				}
+			},
+			"success": true
+		}
+	],
+	"usage": {
+		"prompt_tokens": 22,
+		"completion_tokens": 243,
+		"total_tokens": 265
+	}
 }
 ```
 
@@ -199,53 +185,56 @@ Below is a sample TypeScript Worker that receives a batch of inference requests,
 
 ```ts
 export interface Env {
-  AI: {
-    run: (model: string, payload: any) => Promise<any>;
-  };
+	AI: {
+		run: (model: string, payload: any) => Promise<any>;
+	};
 }
 
 export default {
-  async fetch(request: Request, env: Env): Promise<Response> {
-    // Only allow POST requests
-    if (request.method !== "POST") {
-      return new Response("Method Not Allowed", { status: 405 });
-    }
-
-    try {
-      // Parse the incoming JSON payload
-      const data = await request.json();
-
-      // Validate that we have a 'prompts' array in the payload
-      if (!data.prompts || !Array.isArray(data.prompts)) {
-        return new Response(
-          JSON.stringify({
-            error: "Missing or invalid 'prompts' array in request payload."
-          }),
-          { status: 400, headers: { "Content-Type": "application/json" } }
-        );
-      }
-
-      // Send the batch request to the AI model via the AI binding
-      // Replace "@cf/meta/llama-3.3-70b-instruct-batch" with your desired batch-enabled model if needed.
-      const batchResponse = await env.AI.run("@cf/meta/llama-3.3-70b-instruct-batch", {
-        prompts: data.prompts,
-      });
-
-      // Return the response from the AI API
-      return new Response(JSON.stringify(batchResponse), {
-        status: 200,
-        headers: { "Content-Type": "application/json" },
-      });
-    } catch (error: any) {
-      // Log the error if needed and return a 500 response
-      return new Response(
-        JSON.stringify({
-          error: error?.toString() || "An unknown error occurred."
-        }),
-        { status: 500, headers: { "Content-Type": "application/json" } }
-      );
-    }
-  },
+	async fetch(request: Request, env: Env): Promise<Response> {
+		// Only allow POST requests
+		if (request.method !== "POST") {
+			return new Response("Method Not Allowed", { status: 405 });
+		}
+
+		try {
+			// Parse the incoming JSON payload
+			const data = await request.json();
+
+			// Validate that we have a 'prompts' array in the payload
+			if (!data.prompts || !Array.isArray(data.prompts)) {
+				return new Response(
+					JSON.stringify({
+						error: "Missing or invalid 'prompts' array in request payload.",
+					}),
+					{ status: 400, headers: { "Content-Type": "application/json" } },
+				);
+			}
+
+			// Send the batch request to the AI model via the AI binding
+			// Replace "@cf/meta/llama-3.3-70b-instruct-batch" with your desired batch-enabled model if needed.
+			const batchResponse = await env.AI.run(
+				"@cf/meta/llama-3.3-70b-instruct-batch",
+				{
+					prompts: data.prompts,
+				},
+			);
+
+			// Return the response from the AI API
+			return new Response(JSON.stringify(batchResponse), {
+				status: 200,
+				headers: { "Content-Type": "application/json" },
+			});
+		} catch (error: any) {
+			// Log the error if needed and return a 500 response
+			return new Response(
+				JSON.stringify({
+					error: error?.toString() || "An unknown error occurred.",
+				}),
+				{ status: 500, headers: { "Content-Type": "application/json" } },
+			);
+		}
+	},
 };
 ```
 
@@ -268,4 +257,4 @@ After completing your changes, deploy your Worker with the following command:
 npm run deploy
 ```
 
-By following this guide, you can create a Worker that leverages the async batch API to efficiently handle large workloads and improve the performance of both batch and real-time applications.
+By following this guide, you can create a Worker that leverages the async batch API to efficiently handle large workloads and improve the performance of both batch and real-time applications.