From b2fc8e22f932bfbf3ca881b1c02fc645cdcfac08 Mon Sep 17 00:00:00 2001
From: Thomas Gauvin <35609369+thomasgauvin@users.noreply.github.com>
Date: Sat, 18 Jan 2025 15:27:53 -0500
Subject: [PATCH 1/3] Update bindings.mdx
---
.../workers-ai/configuration/bindings.mdx | 52 ++++++++++++++++++-
1 file changed, 50 insertions(+), 2 deletions(-)
diff --git a/src/content/docs/workers-ai/configuration/bindings.mdx b/src/content/docs/workers-ai/configuration/bindings.mdx
index bcc052d223d465..57dbb2be6add9d 100644
--- a/src/content/docs/workers-ai/configuration/bindings.mdx
+++ b/src/content/docs/workers-ai/configuration/bindings.mdx
@@ -55,9 +55,57 @@ const answer = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
**Supported options**
+ * `prompt`
+ * Text prompt for the text-generation (maxLength: 131072, minLength: 1).
+ * `raw`
+ * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
* `stream`
- * Returns a stream of results as they are available.
-
+ * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+ * `max_tokens`
+ * The maximum number of tokens to generate in the response.
+ * `temperature`
+ * Controls the randomness of the output; higher values produce more random results (maximum: 5, minimum: 0).
+ * `top_p`
+ * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses (maximum: 2, minimum: 0).
+ * `top_k`
+ * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises (maximum: 50, minimum: 1).
+ * `seed`
+ * Random seed for reproducibility of the generation (maximum: 9999999999, minimum: 1).
+ * `repetition_penalty`
+ * Penalty for repeated tokens; higher values discourage repetition (maximum: 2, minimum: 0).
+ * `frequency_penalty`
+ * Decreases the likelihood of the model repeating the same lines verbatim (maximum: 2, minimum: 0).
+ * `presence_penalty`
+ * Increases the likelihood of the model introducing new topics (maximum: 2, minimum: 0).
+ * `messages` );
+ content: string;
+ name?: string;
+}[]" />
+ * An array of message objects representing the conversation history.
+ * `tools` );
+ function: {
+ name: string;
+ description: string;
+ parameters?: {
+ type: "object" | (string & NonNullable);
+ properties: {
+ [key: string]: {
+ type: string;
+ description?: string;
+ };
+ };
+ required: string[];
+ };
+ };
+}[]" />
+ * A list of tools available for the assistant to use.
+ * `functions`
+ * A list of functions available for the assistant to use.
```javascript
From ceb28bd4bc1a7cb279d67be90681c1399db766c3 Mon Sep 17 00:00:00 2001
From: Thomas Gauvin <35609369+thomasgauvin@users.noreply.github.com>
Date: Sat, 18 Jan 2025 15:36:04 -0500
Subject: [PATCH 2/3] Update
src/content/docs/workers-ai/configuration/bindings.mdx
---
src/content/docs/workers-ai/configuration/bindings.mdx | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/content/docs/workers-ai/configuration/bindings.mdx b/src/content/docs/workers-ai/configuration/bindings.mdx
index 57dbb2be6add9d..0d80c4917a4695 100644
--- a/src/content/docs/workers-ai/configuration/bindings.mdx
+++ b/src/content/docs/workers-ai/configuration/bindings.mdx
@@ -78,18 +78,18 @@ const answer = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
* `presence_penalty`
* Increases the likelihood of the model introducing new topics (maximum: 2, minimum: 0).
* `messages` );
+ role: \"user\" | \"assistant\" | \"system\" | \"tool\" | (string & NonNullable);
content: string;
name?: string;
}[]" />
* An array of message objects representing the conversation history.
* `tools` );
+ type: \"function\" | (string & NonNullable);
function: {
name: string;
description: string;
parameters?: {
- type: "object" | (string & NonNullable);
+ type: \"object\" | (string & NonNullable);
properties: {
[key: string]: {
type: string;
From 4f0890e8e697d1ef70ffad7a63966ebc4dc53342 Mon Sep 17 00:00:00 2001
From: Thomas Gauvin <35609369+thomasgauvin@users.noreply.github.com>
Date: Sat, 18 Jan 2025 15:43:27 -0500
Subject: [PATCH 3/3] thomasgauvin: add html characters because escaping double
quotes doesnt seem to work
---
.../workers-ai/configuration/bindings.mdx | 127 +++++++++---------
1 file changed, 60 insertions(+), 67 deletions(-)
diff --git a/src/content/docs/workers-ai/configuration/bindings.mdx b/src/content/docs/workers-ai/configuration/bindings.mdx
index 0d80c4917a4695..efdd520e12bcb0 100644
--- a/src/content/docs/workers-ai/configuration/bindings.mdx
+++ b/src/content/docs/workers-ai/configuration/bindings.mdx
@@ -3,7 +3,6 @@ pcx_content_type: configuration
title: Workers Bindings
sidebar:
order: 1
-
---
import { Type, MetaInfo } from "~/components";
@@ -40,81 +39,75 @@ To configure a Workers AI binding in your Pages Function, you must use the Cloud
`async env.AI.run()` runs a model. Takes a model as the first parameter, and an object as the second parameter.
```javascript
-const answer = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
- prompt: "What is the origin of the phrase 'Hello, World'"
+const answer = await env.AI.run("@cf/meta/llama-3.1-8b-instruct", {
+ prompt: "What is the origin of the phrase 'Hello, World'",
});
```
-**Parameters**
+```javascript
+const answer = await env.AI.run("@cf/meta/llama-3.1-8b-instruct", {
+ prompt: "What is the origin of the phrase 'Hello, World'",
+ stream: true,
+});
+return new Response(answer, {
+ headers: { "content-type": "text/event-stream" },
+});
+```
+**Parameters**
-* `model`
+- `model`
- * The model to run.
+ - The model to run.
**Supported options**
- * `prompt`
- * Text prompt for the text-generation (maxLength: 131072, minLength: 1).
- * `raw`
- * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
- * `stream`
- * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
- * `max_tokens`
- * The maximum number of tokens to generate in the response.
- * `temperature`
- * Controls the randomness of the output; higher values produce more random results (maximum: 5, minimum: 0).
- * `top_p`
- * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses (maximum: 2, minimum: 0).
- * `top_k`
- * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises (maximum: 50, minimum: 1).
- * `seed`
- * Random seed for reproducibility of the generation (maximum: 9999999999, minimum: 1).
- * `repetition_penalty`
- * Penalty for repeated tokens; higher values discourage repetition (maximum: 2, minimum: 0).
- * `frequency_penalty`
- * Decreases the likelihood of the model repeating the same lines verbatim (maximum: 2, minimum: 0).
- * `presence_penalty`
- * Increases the likelihood of the model introducing new topics (maximum: 2, minimum: 0).
- * `messages` );
- content: string;
- name?: string;
-}[]" />
- * An array of message objects representing the conversation history.
- * `tools` );
- function: {
- name: string;
- description: string;
- parameters?: {
- type: \"object\" | (string & NonNullable);
- properties: {
- [key: string]: {
- type: string;
- description?: string;
+ - `prompt`
+ - Text prompt for the text-generation (maxLength: 131072, minLength: 1).
+ - `raw`
+ - If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+ - `stream`
+ - If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+ - `max_tokens`
+ - The maximum number of tokens to generate in the response.
+ - `temperature`
+ - Controls the randomness of the output; higher values produce more random results (maximum: 5, minimum: 0).
+ - `top_p`
+ - Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses (maximum: 2, minimum: 0).
+ - `top_k`
+ - Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises (maximum: 50, minimum: 1).
+ - `seed`
+ - Random seed for reproducibility of the generation (maximum: 9999999999, minimum: 1).
+ - `repetition_penalty`
+ - Penalty for repeated tokens; higher values discourage repetition (maximum: 2, minimum: 0).
+ - `frequency_penalty`
+ - Decreases the likelihood of the model repeating the same lines verbatim (maximum: 2, minimum: 0).
+ - `presence_penalty`
+ - Increases the likelihood of the model introducing new topics (maximum: 2, minimum: 0).
+ - `messages` \* An array of message objects representing the conversation history.
+ - `tools`
- * A list of tools available for the assistant to use.
- * `functions`
- * A list of functions available for the assistant to use.
-
-
-```javascript
-const answer = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
- prompt: "What is the origin of the phrase 'Hello, World'",
- stream: true
-});
-
-return new Response(answer, {
- headers: { "content-type": "text/event-stream" }
-});
-```
+ }[]" /> \* A list of tools available for the assistant to use.
+ - `functions` \* A list of functions available for the assistant to use.