From b2fc8e22f932bfbf3ca881b1c02fc645cdcfac08 Mon Sep 17 00:00:00 2001 From: Thomas Gauvin <35609369+thomasgauvin@users.noreply.github.com> Date: Sat, 18 Jan 2025 15:27:53 -0500 Subject: [PATCH 1/3] Update bindings.mdx --- .../workers-ai/configuration/bindings.mdx | 52 ++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/src/content/docs/workers-ai/configuration/bindings.mdx b/src/content/docs/workers-ai/configuration/bindings.mdx index bcc052d223d465..57dbb2be6add9d 100644 --- a/src/content/docs/workers-ai/configuration/bindings.mdx +++ b/src/content/docs/workers-ai/configuration/bindings.mdx @@ -55,9 +55,57 @@ const answer = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', { **Supported options** + * `prompt` + * Text prompt for the text-generation (maxLength: 131072, minLength: 1). + * `raw` + * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. * `stream` - * Returns a stream of results as they are available. - + * If true, the response will be streamed back incrementally using SSE, Server Sent Events. + * `max_tokens` + * The maximum number of tokens to generate in the response. + * `temperature` + * Controls the randomness of the output; higher values produce more random results (maximum: 5, minimum: 0). + * `top_p` + * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses (maximum: 2, minimum: 0). + * `top_k` + * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises (maximum: 50, minimum: 1). + * `seed` + * Random seed for reproducibility of the generation (maximum: 9999999999, minimum: 1). + * `repetition_penalty` + * Penalty for repeated tokens; higher values discourage repetition (maximum: 2, minimum: 0). + * `frequency_penalty` + * Decreases the likelihood of the model repeating the same lines verbatim (maximum: 2, minimum: 0). + * `presence_penalty` + * Increases the likelihood of the model introducing new topics (maximum: 2, minimum: 0). + * `messages` ); + content: string; + name?: string; +}[]" /> + * An array of message objects representing the conversation history. + * `tools` ); + function: { + name: string; + description: string; + parameters?: { + type: "object" | (string & NonNullable); + properties: { + [key: string]: { + type: string; + description?: string; + }; + }; + required: string[]; + }; + }; +}[]" /> + * A list of tools available for the assistant to use. + * `functions` + * A list of functions available for the assistant to use. ```javascript From ceb28bd4bc1a7cb279d67be90681c1399db766c3 Mon Sep 17 00:00:00 2001 From: Thomas Gauvin <35609369+thomasgauvin@users.noreply.github.com> Date: Sat, 18 Jan 2025 15:36:04 -0500 Subject: [PATCH 2/3] Update src/content/docs/workers-ai/configuration/bindings.mdx --- src/content/docs/workers-ai/configuration/bindings.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/content/docs/workers-ai/configuration/bindings.mdx b/src/content/docs/workers-ai/configuration/bindings.mdx index 57dbb2be6add9d..0d80c4917a4695 100644 --- a/src/content/docs/workers-ai/configuration/bindings.mdx +++ b/src/content/docs/workers-ai/configuration/bindings.mdx @@ -78,18 +78,18 @@ const answer = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', { * `presence_penalty` * Increases the likelihood of the model introducing new topics (maximum: 2, minimum: 0). * `messages` ); + role: \"user\" | \"assistant\" | \"system\" | \"tool\" | (string & NonNullable); content: string; name?: string; }[]" /> * An array of message objects representing the conversation history. * `tools` ); + type: \"function\" | (string & NonNullable); function: { name: string; description: string; parameters?: { - type: "object" | (string & NonNullable); + type: \"object\" | (string & NonNullable); properties: { [key: string]: { type: string; From 4f0890e8e697d1ef70ffad7a63966ebc4dc53342 Mon Sep 17 00:00:00 2001 From: Thomas Gauvin <35609369+thomasgauvin@users.noreply.github.com> Date: Sat, 18 Jan 2025 15:43:27 -0500 Subject: [PATCH 3/3] thomasgauvin: add html characters because escaping double quotes doesnt seem to work --- .../workers-ai/configuration/bindings.mdx | 127 +++++++++--------- 1 file changed, 60 insertions(+), 67 deletions(-) diff --git a/src/content/docs/workers-ai/configuration/bindings.mdx b/src/content/docs/workers-ai/configuration/bindings.mdx index 0d80c4917a4695..efdd520e12bcb0 100644 --- a/src/content/docs/workers-ai/configuration/bindings.mdx +++ b/src/content/docs/workers-ai/configuration/bindings.mdx @@ -3,7 +3,6 @@ pcx_content_type: configuration title: Workers Bindings sidebar: order: 1 - --- import { Type, MetaInfo } from "~/components"; @@ -40,81 +39,75 @@ To configure a Workers AI binding in your Pages Function, you must use the Cloud `async env.AI.run()` runs a model. Takes a model as the first parameter, and an object as the second parameter. ```javascript -const answer = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', { - prompt: "What is the origin of the phrase 'Hello, World'" +const answer = await env.AI.run("@cf/meta/llama-3.1-8b-instruct", { + prompt: "What is the origin of the phrase 'Hello, World'", }); ``` -**Parameters** +```javascript +const answer = await env.AI.run("@cf/meta/llama-3.1-8b-instruct", { + prompt: "What is the origin of the phrase 'Hello, World'", + stream: true, +}); +return new Response(answer, { + headers: { "content-type": "text/event-stream" }, +}); +``` +**Parameters** -* `model` +- `model` - * The model to run. + - The model to run. **Supported options** - * `prompt` - * Text prompt for the text-generation (maxLength: 131072, minLength: 1). - * `raw` - * If true, a chat template is not applied and you must adhere to the specific model's expected formatting. - * `stream` - * If true, the response will be streamed back incrementally using SSE, Server Sent Events. - * `max_tokens` - * The maximum number of tokens to generate in the response. - * `temperature` - * Controls the randomness of the output; higher values produce more random results (maximum: 5, minimum: 0). - * `top_p` - * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses (maximum: 2, minimum: 0). - * `top_k` - * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises (maximum: 50, minimum: 1). - * `seed` - * Random seed for reproducibility of the generation (maximum: 9999999999, minimum: 1). - * `repetition_penalty` - * Penalty for repeated tokens; higher values discourage repetition (maximum: 2, minimum: 0). - * `frequency_penalty` - * Decreases the likelihood of the model repeating the same lines verbatim (maximum: 2, minimum: 0). - * `presence_penalty` - * Increases the likelihood of the model introducing new topics (maximum: 2, minimum: 0). - * `messages` ); - content: string; - name?: string; -}[]" /> - * An array of message objects representing the conversation history. - * `tools` ); - function: { - name: string; - description: string; - parameters?: { - type: \"object\" | (string & NonNullable); - properties: { - [key: string]: { - type: string; - description?: string; + - `prompt` + - Text prompt for the text-generation (maxLength: 131072, minLength: 1). + - `raw` + - If true, a chat template is not applied and you must adhere to the specific model's expected formatting. + - `stream` + - If true, the response will be streamed back incrementally using SSE, Server Sent Events. + - `max_tokens` + - The maximum number of tokens to generate in the response. + - `temperature` + - Controls the randomness of the output; higher values produce more random results (maximum: 5, minimum: 0). + - `top_p` + - Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses (maximum: 2, minimum: 0). + - `top_k` + - Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises (maximum: 50, minimum: 1). + - `seed` + - Random seed for reproducibility of the generation (maximum: 9999999999, minimum: 1). + - `repetition_penalty` + - Penalty for repeated tokens; higher values discourage repetition (maximum: 2, minimum: 0). + - `frequency_penalty` + - Decreases the likelihood of the model repeating the same lines verbatim (maximum: 2, minimum: 0). + - `presence_penalty` + - Increases the likelihood of the model introducing new topics (maximum: 2, minimum: 0). + - `messages` \* An array of message objects representing the conversation history. + - `tools` - * A list of tools available for the assistant to use. - * `functions` - * A list of functions available for the assistant to use. - - -```javascript -const answer = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', { - prompt: "What is the origin of the phrase 'Hello, World'", - stream: true -}); - -return new Response(answer, { - headers: { "content-type": "text/event-stream" } -}); -``` + }[]" /> \* A list of tools available for the assistant to use. + - `functions` \* A list of functions available for the assistant to use.