From 289f7f2ab9320a98dbcdbb2900d5333de0333dde Mon Sep 17 00:00:00 2001 From: kodster28 Date: Tue, 14 Oct 2025 13:36:24 -0500 Subject: [PATCH] [Workers AI] Model updates --- src/content/workers-ai-models/aura-1.json | 20 +- src/content/workers-ai-models/bge-m3.json | 4 + .../embeddinggemma-300m.json | 56 +- src/content/workers-ai-models/flux.json | 232 +-- .../gemma-sea-lion-v4-27b-it.json | 1369 ++++++++++++----- .../workers-ai-models/gpt-oss-120b.json | 128 +- .../workers-ai-models/gpt-oss-20b.json | 132 +- .../workers-ai-models/plamo-embedding-1b.json | 75 +- .../workers-ai-models/smart-turn-v2.json | 4 +- 9 files changed, 1365 insertions(+), 655 deletions(-) diff --git a/src/content/workers-ai-models/aura-1.json b/src/content/workers-ai-models/aura-1.json index a83c566ed831be7..af007866cbbb395 100644 --- a/src/content/workers-ai-models/aura-1.json +++ b/src/content/workers-ai-models/aura-1.json @@ -15,23 +15,23 @@ "property_id": "async_queue", "value": "true" }, - { - "property_id": "partner", - "value": "true" - }, - { - "property_id": "realtime", - "value": "true" - }, { "property_id": "price", "value": [ { - "unit": "per 1k characters", - "price": 0.0150, + "unit": "per audio minute", + "price": 0, "currency": "USD" } ] + }, + { + "property_id": "partner", + "value": "true" + }, + { + "property_id": "realtime", + "value": "true" } ], "schema": { diff --git a/src/content/workers-ai-models/bge-m3.json b/src/content/workers-ai-models/bge-m3.json index 02420e31c02c727..7807ba7b7e1ee6b 100644 --- a/src/content/workers-ai-models/bge-m3.json +++ b/src/content/workers-ai-models/bge-m3.json @@ -15,6 +15,10 @@ "property_id": "async_queue", "value": "true" }, + { + "property_id": "context_window", + "value": "60000" + }, { "property_id": "price", "value": [ diff --git a/src/content/workers-ai-models/embeddinggemma-300m.json b/src/content/workers-ai-models/embeddinggemma-300m.json index 608e7315b7192c6..49cccd925b327cb 100644 --- a/src/content/workers-ai-models/embeddinggemma-300m.json +++ b/src/content/workers-ai-models/embeddinggemma-300m.json @@ -1,5 +1,5 @@ { - "id": "d2f07a41-c152-4061-8083-ec655cbf91a1", + "id": "15631501-2742-4346-a469-22fe202188a2", "source": 1, "name": "@cf/google/embeddinggemma-300m", "description": "EmbeddingGemma is a 300M parameter, state-of-the-art for its size, open embedding model from Google, built from Gemma 3 (with T5Gemma initialization) and the same research and technology used to create Gemini models. EmbeddingGemma produces vector representations of text, making it well-suited for search and retrieval tasks, including classification, clustering, and semantic similarity search. This model was trained with data in 100+ spoken languages.", @@ -8,7 +8,7 @@ "name": "Text Embeddings", "description": "Feature extraction models transform raw data into numerical features that can be processed while preserving the information in the original dataset. These models are ideal as part of building vector search applications or Retrieval Augmented Generation workflows with Large Language Models (LLM)." }, - "created_at": "2025-09-03 20:48:38.784", + "created_at": "2025-09-04 16:38:44.980", "tags": [], "properties": [], "schema": { @@ -16,43 +16,51 @@ "type": "object", "properties": { "text": { - "oneOf": [{ - "type": "string" - }, { - "type": "array", - "items": { - "type": "string" + "oneOf": [ + { + "type": "string", + "description": "The text to embed", + "minLength": 1 + }, + { + "type": "array", + "description": "Batch of text values to embed", + "items": { + "type": "string", + "description": "The text to embed", + "minLength": 1 + }, + "maxItems": 100 } - }], - "description": "Input text to embed. Can be a single string or a list of strings." + ] } }, - "required": ["text"] + "required": [ + "text" + ] }, "output": { "type": "object", + "contentType": "application/json", "properties": { + "shape": { + "type": "array", + "items": { + "type": "number" + } + }, "data": { "type": "array", + "description": "Embeddings of the requested text values", "items": { "type": "array", + "description": "Floating point embedding representation shaped by the embedding model", "items": { "type": "number" } - }, - "description": "Embedding vectors, where each vector is a list of floats." - }, - "shape": { - "type": "array", - "items": { - "type": "integer" - }, - "minItems": 2, - "maxItems": 2, - "description": "Shape of the embedding data as [number_of_embeddings, embedding_dimension]." + } } - }, - "required": ["data", "shape"] + } } } } \ No newline at end of file diff --git a/src/content/workers-ai-models/flux.json b/src/content/workers-ai-models/flux.json index 59ef105c2935fa4..abbfb0507c67cf8 100644 --- a/src/content/workers-ai-models/flux.json +++ b/src/content/workers-ai-models/flux.json @@ -21,126 +21,126 @@ } ], "schema": { - "input": { - "type": "object", - "properties": { - "encoding": { - "type": "string", - "description": "Encoding of the audio stream. Currently only supports raw signed little-endian 16-bit PCM.", - "enum": [ - "linear16" - ] - }, - "sample_rate": { - "type": "string", - "description": "Sample rate of the audio stream in Hz.", - "pattern": "^[0-9]+$" - }, - "eager_eot_threshold": { - "type": "string", - "description": "End-of-turn confidence required to fire an eager end-of-turn event. When set, enables EagerEndOfTurn and TurnResumed events. Valid Values 0.3 - 0.9." - }, - "eot_threshold": { - "type": "string", - "description": "End-of-turn confidence required to finish a turn. Valid Values 0.5 - 0.9.", - "default": "0.7" - }, - "eot_timeout_ms": { - "type": "string", - "description": "A turn will be finished when this much time has passed after speech, regardless of EOT confidence.", - "default": "5000", - "pattern": "^[0-9]+$" - }, - "keyterm": { - "type": "string", - "description": "Keyterm prompting can improve recognition of specialized terminology. Pass multiple keyterm query parameters to boost multiple keyterms." - }, - "mip_opt_out": { - "type": "string", - "description": "Opts out requests from the Deepgram Model Improvement Program. Refer to Deepgram Docs for pricing impacts before setting this to true. https://dpgr.am/deepgram-mip", - "enum": [ - "true", - "false" - ], - "default": "false" - }, - "tag": { - "type": "string", - "description": "Label your requests for the purpose of identification during usage reporting" - }, - "required": [ - "sample_rate", - "encoding" - ] - } - }, - "output": { - "type": "object", - "description": "Output will be returned as websocket messages.", - "properties": { - "request_id": { - "type": "string", - "description": "The unique identifier of the request (uuid)" - }, - "sequence_id": { - "type": "integer", - "description": "Starts at 0 and increments for each message the server sends to the client.", - "minimum": 0 - }, - "event": { - "type": "string", - "description": "The type of event being reported.", - "enum": [ - "Update", - "StartOfTurn", - "EagerEndOfTurn", - "TurnResumed", - "EndOfTurn" + "input": { + "type": "object", + "properties": { + "encoding": { + "type": "string", + "description": "Encoding of the audio stream. Currently only supports raw signed little-endian 16-bit PCM.", + "enum": [ + "linear16" + ] + }, + "sample_rate": { + "type": "string", + "description": "Sample rate of the audio stream in Hz.", + "pattern": "^[0-9]+$" + }, + "eager_eot_threshold": { + "type": "string", + "description": "End-of-turn confidence required to fire an eager end-of-turn event. When set, enables EagerEndOfTurn and TurnResumed events. Valid Values 0.3 - 0.9." + }, + "eot_threshold": { + "type": "string", + "description": "End-of-turn confidence required to finish a turn. Valid Values 0.5 - 0.9.", + "default": "0.7" + }, + "eot_timeout_ms": { + "type": "string", + "description": "A turn will be finished when this much time has passed after speech, regardless of EOT confidence.", + "default": "5000", + "pattern": "^[0-9]+$" + }, + "keyterm": { + "type": "string", + "description": "Keyterm prompting can improve recognition of specialized terminology. Pass multiple keyterm query parameters to boost multiple keyterms." + }, + "mip_opt_out": { + "type": "string", + "description": "Opts out requests from the Deepgram Model Improvement Program. Refer to Deepgram Docs for pricing impacts before setting this to true. https://dpgr.am/deepgram-mip", + "enum": [ + "true", + "false" + ], + "default": "false" + }, + "tag": { + "type": "string", + "description": "Label your requests for the purpose of identification during usage reporting" + } + }, + "required": [ + "sample_rate", + "encoding" ] - }, - "turn_index": { - "type": "integer", - "description": "The index of the current turn", - "minimum": 0 - }, - "audio_window_start": { - "type": "number", - "description": "Start time in seconds of the audio range that was transcribed" - }, - "audio_window_end": { - "type": "number", - "description": "End time in seconds of the audio range that was transcribed" - }, - "transcript": { - "type": "string", - "description": "Text that was said over the course of the current turn" - }, - "words": { - "type": "array", - "description": "The words in the transcript", - "items": { - "type": "object", - "required": [ - "word", - "confidence" - ], - "properties": { - "word": { - "type": "string", - "description": "The individual punctuated, properly-cased word from the transcript" + }, + "output": { + "type": "object", + "description": "Output will be returned as websocket messages.", + "properties": { + "request_id": { + "type": "string", + "description": "The unique identifier of the request (uuid)" + }, + "sequence_id": { + "type": "integer", + "description": "Starts at 0 and increments for each message the server sends to the client.", + "minimum": 0 + }, + "event": { + "type": "string", + "description": "The type of event being reported.", + "enum": [ + "Update", + "StartOfTurn", + "EagerEndOfTurn", + "TurnResumed", + "EndOfTurn" + ] + }, + "turn_index": { + "type": "integer", + "description": "The index of the current turn", + "minimum": 0 + }, + "audio_window_start": { + "type": "number", + "description": "Start time in seconds of the audio range that was transcribed" + }, + "audio_window_end": { + "type": "number", + "description": "End time in seconds of the audio range that was transcribed" + }, + "transcript": { + "type": "string", + "description": "Text that was said over the course of the current turn" + }, + "words": { + "type": "array", + "description": "The words in the transcript", + "items": { + "type": "object", + "required": [ + "word", + "confidence" + ], + "properties": { + "word": { + "type": "string", + "description": "The individual punctuated, properly-cased word from the transcript" + }, + "confidence": { + "type": "number", + "description": "Confidence that this word was transcribed correctly" + } + } + } }, - "confidence": { - "type": "number", - "description": "Confidence that this word was transcribed correctly" + "end_of_turn_confidence": { + "type": "number", + "description": "Confidence that no more speech is coming in this turn" } - } } - }, - "end_of_turn_confidence": { - "type": "number", - "description": "Confidence that no more speech is coming in this turn" - } } - } } } \ No newline at end of file diff --git a/src/content/workers-ai-models/gemma-sea-lion-v4-27b-it.json b/src/content/workers-ai-models/gemma-sea-lion-v4-27b-it.json index a56e39e7feaa9fc..dcede72266d904b 100644 --- a/src/content/workers-ai-models/gemma-sea-lion-v4-27b-it.json +++ b/src/content/workers-ai-models/gemma-sea-lion-v4-27b-it.json @@ -32,423 +32,988 @@ } ], "schema": { - "input": { - "$id": "http://ai.cloudflare.com/schemas/textGenerationInput", - "type": "object", - "oneOf": [ - { - "title": "Prompt", - "properties": { - "$merge": { - "source": { - "prompt": { - "$ref": "textGenerationPrompts#/prompt" - }, - "lora": { - "$ref": "textGenerationFinetune#/lora" - }, - "response_format": { - "$ref": "jsonMode#/response_format" - } - }, - "with": { - "raw": { - "type": "boolean", - "default": false, - "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." - }, - "stream": { - "type": "boolean", - "default": false, - "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." - }, - "max_tokens": { - "type": "integer", - "default": 2000, - "description": "The maximum number of tokens to generate in the response." - }, - "temperature": { - "type": "number", - "default": 0.6, - "minimum": 0, - "maximum": 5, - "description": "Controls the randomness of the output; higher values produce more random results." - }, - "top_p": { - "type": "number", - "minimum": 0.001, - "maximum": 1, - "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." - }, - "top_k": { - "type": "integer", - "minimum": 1, - "maximum": 50, - "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." - }, - "seed": { - "type": "integer", - "minimum": 1, - "maximum": 9999999999, - "description": "Random seed for reproducibility of the generation." - }, - "repetition_penalty": { - "type": "number", - "minimum": 0, - "maximum": 2, - "description": "Penalty for repeated tokens; higher values discourage repetition." - }, - "frequency_penalty": { - "type": "number", - "minimum": -2, - "maximum": 2, - "description": "Decreases the likelihood of the model repeating the same lines verbatim." - }, - "presence_penalty": { - "type": "number", - "minimum": -2, - "maximum": 2, - "description": "Increases the likelihood of the model introducing new topics." - } - } - } - }, - "required": ["prompt"] - }, - { - "title": "Messages", - "properties": { - "$merge": { - "source": { - "messages": { - "$ref": "textGenerationPrompts#/messages" - }, - "functions": { - "$ref": "textGenerationTools#/functions" - }, - "tools": { - "$ref": "textGenerationTools#/tools" - }, - "response_format": { - "$ref": "jsonMode#/response_format" - } - }, - "with": { - "raw": { - "type": "boolean", - "default": false, - "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." - }, - "stream": { - "type": "boolean", - "default": false, - "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." - }, - "max_tokens": { - "type": "integer", - "default": 2000, - "description": "The maximum number of tokens to generate in the response." - }, - "temperature": { - "type": "number", - "default": 0.6, - "minimum": 0, - "maximum": 5, - "description": "Controls the randomness of the output; higher values produce more random results." - }, - "top_p": { - "type": "number", - "minimum": 0.001, - "maximum": 1, - "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." - }, - "top_k": { - "type": "integer", - "minimum": 1, - "maximum": 50, - "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." - }, - "seed": { - "type": "integer", - "minimum": 1, - "maximum": 9999999999, - "description": "Random seed for reproducibility of the generation." - }, - "repetition_penalty": { - "type": "number", - "minimum": 0, - "maximum": 2, - "description": "Penalty for repeated tokens; higher values discourage repetition." - }, - "frequency_penalty": { - "type": "number", - "minimum": -2, - "maximum": 2, - "description": "Decreases the likelihood of the model repeating the same lines verbatim." - }, - "presence_penalty": { - "type": "number", - "minimum": -2, - "maximum": 2, - "description": "Increases the likelihood of the model introducing new topics." - } - } - } - }, - "required": ["messages"] - }, - { - "title": "Async Batch", + "input": { "type": "object", - "properties": { - "requests": { - "type": "array", - "items": { - "type": "object", - "oneOf": [ - { - "title": "Prompt", - "properties": { - "$merge": { - "source": { - "prompt": { - "$ref": "textGenerationPrompts#/prompt" - }, - "lora": { - "$ref": "textGenerationFinetune#/lora" - }, - "response_format": { - "$ref": "jsonMode#/response_format" + "oneOf": [ + { + "title": "Prompt", + "properties": { + "prompt": { + "type": "string", + "minLength": 1, + "description": "The input text prompt for the model to generate a response." + }, + "lora": { + "type": "string", + "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model." + }, + "response_format": { + "title": "JSON Mode", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "json_object", + "json_schema" + ] + }, + "json_schema": {} } - }, - "with": { - "$ref": "textGenerationOptions#/common" - } + }, + "raw": { + "type": "boolean", + "default": false, + "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." + }, + "stream": { + "type": "boolean", + "default": false, + "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." + }, + "max_tokens": { + "type": "integer", + "default": 2000, + "description": "The maximum number of tokens to generate in the response." + }, + "temperature": { + "type": "number", + "default": 0.6, + "minimum": 0, + "maximum": 5, + "description": "Controls the randomness of the output; higher values produce more random results." + }, + "top_p": { + "type": "number", + "minimum": 0.001, + "maximum": 1, + "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + }, + "top_k": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + }, + "seed": { + "type": "integer", + "minimum": 1, + "maximum": 9999999999, + "description": "Random seed for reproducibility of the generation." + }, + "repetition_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Penalty for repeated tokens; higher values discourage repetition." + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "description": "Increases the likelihood of the model introducing new topics." } - }, - "required": ["prompt"] }, - { - "title": "Messages", - "properties": { - "$merge": { - "source": { - "messages": { - "$ref": "textGenerationPrompts#/messages" - }, - "functions": { - "$ref": "textGenerationTools#/functions" - }, - "tools": { - "$ref": "textGenerationTools#/tools" - }, - "response_format": { - "$ref": "jsonMode#/response_format" + "required": [ + "prompt" + ] + }, + { + "title": "Messages", + "properties": { + "messages": { + "type": "array", + "description": "An array of message objects representing the conversation history.", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')." + }, + "content": { + "type": "string", + "description": "The content of the message as a string." + } + }, + "required": [ + "role", + "content" + ] + } + }, + "functions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "code": { + "type": "string" + } + }, + "required": [ + "name", + "code" + ] + } + }, + "tools": { + "type": "array", + "description": "A list of tools available for the assistant to use.", + "items": { + "type": "object", + "oneOf": [ + { + "properties": { + "name": { + "type": "string", + "description": "The name of the tool. More descriptive the better." + }, + "description": { + "type": "string", + "description": "A brief description of what the tool does." + }, + "parameters": { + "type": "object", + "description": "Schema defining the parameters accepted by the tool.", + "properties": { + "type": { + "type": "string", + "description": "The type of the parameters object (usually 'object')." + }, + "required": { + "type": "array", + "description": "List of required parameter names.", + "items": { + "type": "string" + } + }, + "properties": { + "type": "object", + "description": "Definitions of each parameter.", + "additionalProperties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The data type of the parameter." + }, + "description": { + "type": "string", + "description": "A description of the expected parameter." + } + }, + "required": [ + "type", + "description" + ] + } + } + }, + "required": [ + "type", + "properties" + ] + } + }, + "required": [ + "name", + "description", + "parameters" + ] + }, + { + "properties": { + "type": { + "type": "string", + "description": "Specifies the type of tool (e.g., 'function')." + }, + "function": { + "type": "object", + "description": "Details of the function tool.", + "properties": { + "name": { + "type": "string", + "description": "The name of the function." + }, + "description": { + "type": "string", + "description": "A brief description of what the function does." + }, + "parameters": { + "type": "object", + "description": "Schema defining the parameters accepted by the function.", + "properties": { + "type": { + "type": "string", + "description": "The type of the parameters object (usually 'object')." + }, + "required": { + "type": "array", + "description": "List of required parameter names.", + "items": { + "type": "string" + } + }, + "properties": { + "type": "object", + "description": "Definitions of each parameter.", + "additionalProperties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The data type of the parameter." + }, + "description": { + "type": "string", + "description": "A description of the expected parameter." + } + }, + "required": [ + "type", + "description" + ] + } + } + }, + "required": [ + "type", + "properties" + ] + } + }, + "required": [ + "name", + "description", + "parameters" + ] + } + }, + "required": [ + "type", + "function" + ] + } + ] + } + }, + "response_format": { + "title": "JSON Mode", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "json_object", + "json_schema" + ] + }, + "json_schema": {} + } + }, + "raw": { + "type": "boolean", + "default": false, + "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." + }, + "stream": { + "type": "boolean", + "default": false, + "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." + }, + "max_tokens": { + "type": "integer", + "default": 2000, + "description": "The maximum number of tokens to generate in the response." + }, + "temperature": { + "type": "number", + "default": 0.6, + "minimum": 0, + "maximum": 5, + "description": "Controls the randomness of the output; higher values produce more random results." + }, + "top_p": { + "type": "number", + "minimum": 0.001, + "maximum": 1, + "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + }, + "top_k": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + }, + "seed": { + "type": "integer", + "minimum": 1, + "maximum": 9999999999, + "description": "Random seed for reproducibility of the generation." + }, + "repetition_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Penalty for repeated tokens; higher values discourage repetition." + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "description": "Increases the likelihood of the model introducing new topics." + } + }, + "required": [ + "messages" + ] + }, + { + "title": "Async Batch", + "type": "object", + "properties": { + "requests": { + "type": "array", + "items": { + "type": "object", + "oneOf": [ + { + "title": "Prompt", + "properties": { + "prompt": { + "type": "string", + "minLength": 1, + "description": "The input text prompt for the model to generate a response." + }, + "lora": { + "type": "string", + "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model." + }, + "response_format": { + "title": "JSON Mode", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "json_object", + "json_schema" + ] + }, + "json_schema": {} + } + }, + "raw": { + "type": "boolean", + "default": false, + "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." + }, + "stream": { + "type": "boolean", + "default": false, + "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." + }, + "max_tokens": { + "type": "integer", + "default": 256, + "description": "The maximum number of tokens to generate in the response." + }, + "temperature": { + "type": "number", + "default": 0.6, + "minimum": 0, + "maximum": 5, + "description": "Controls the randomness of the output; higher values produce more random results." + }, + "top_p": { + "type": "number", + "minimum": 0.001, + "maximum": 1, + "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + }, + "top_k": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + }, + "seed": { + "type": "integer", + "minimum": 1, + "maximum": 9999999999, + "description": "Random seed for reproducibility of the generation." + }, + "repetition_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Penalty for repeated tokens; higher values discourage repetition." + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "description": "Increases the likelihood of the model introducing new topics." + } + }, + "required": [ + "prompt" + ] + }, + { + "title": "Messages", + "properties": { + "messages": { + "type": "array", + "description": "An array of message objects representing the conversation history.", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')." + }, + "content": { + "type": "string", + "description": "The content of the message as a string." + } + }, + "required": [ + "role", + "content" + ] + } + }, + "functions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "code": { + "type": "string" + } + }, + "required": [ + "name", + "code" + ] + } + }, + "tools": { + "type": "array", + "description": "A list of tools available for the assistant to use.", + "items": { + "type": "object", + "oneOf": [ + { + "properties": { + "name": { + "type": "string", + "description": "The name of the tool. More descriptive the better." + }, + "description": { + "type": "string", + "description": "A brief description of what the tool does." + }, + "parameters": { + "type": "object", + "description": "Schema defining the parameters accepted by the tool.", + "properties": { + "type": { + "type": "string", + "description": "The type of the parameters object (usually 'object')." + }, + "required": { + "type": "array", + "description": "List of required parameter names.", + "items": { + "type": "string" + } + }, + "properties": { + "type": "object", + "description": "Definitions of each parameter.", + "additionalProperties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The data type of the parameter." + }, + "description": { + "type": "string", + "description": "A description of the expected parameter." + } + }, + "required": [ + "type", + "description" + ] + } + } + }, + "required": [ + "type", + "properties" + ] + } + }, + "required": [ + "name", + "description", + "parameters" + ] + }, + { + "properties": { + "type": { + "type": "string", + "description": "Specifies the type of tool (e.g., 'function')." + }, + "function": { + "type": "object", + "description": "Details of the function tool.", + "properties": { + "name": { + "type": "string", + "description": "The name of the function." + }, + "description": { + "type": "string", + "description": "A brief description of what the function does." + }, + "parameters": { + "type": "object", + "description": "Schema defining the parameters accepted by the function.", + "properties": { + "type": { + "type": "string", + "description": "The type of the parameters object (usually 'object')." + }, + "required": { + "type": "array", + "description": "List of required parameter names.", + "items": { + "type": "string" + } + }, + "properties": { + "type": "object", + "description": "Definitions of each parameter.", + "additionalProperties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The data type of the parameter." + }, + "description": { + "type": "string", + "description": "A description of the expected parameter." + } + }, + "required": [ + "type", + "description" + ] + } + } + }, + "required": [ + "type", + "properties" + ] + } + }, + "required": [ + "name", + "description", + "parameters" + ] + } + }, + "required": [ + "type", + "function" + ] + } + ] + } + }, + "response_format": { + "title": "JSON Mode", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "json_object", + "json_schema" + ] + }, + "json_schema": {} + } + }, + "raw": { + "type": "boolean", + "default": false, + "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." + }, + "stream": { + "type": "boolean", + "default": false, + "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." + }, + "max_tokens": { + "type": "integer", + "default": 256, + "description": "The maximum number of tokens to generate in the response." + }, + "temperature": { + "type": "number", + "default": 0.6, + "minimum": 0, + "maximum": 5, + "description": "Controls the randomness of the output; higher values produce more random results." + }, + "top_p": { + "type": "number", + "minimum": 0.001, + "maximum": 1, + "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + }, + "top_k": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + }, + "seed": { + "type": "integer", + "minimum": 1, + "maximum": 9999999999, + "description": "Random seed for reproducibility of the generation." + }, + "repetition_penalty": { + "type": "number", + "minimum": 0, + "maximum": 2, + "description": "Penalty for repeated tokens; higher values discourage repetition." + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "description": "Increases the likelihood of the model introducing new topics." + } + }, + "required": [ + "messages" + ] + } + ] } - }, - "with": { - "$ref": "textGenerationOptions#/common" - } } - }, - "required": ["messages"] - } - ] - } - } - }, - "required": ["requests"] - } - ] - }, - "output": { - "oneOf": [ - { - "type": "object", - "contentType": "application/json", - "title": "Chat Completion Response", - "properties": { - "id": { - "type": "string", - "description": "Unique identifier for the completion" - }, - "object": { - "type": "string", - "enum": ["chat.completion"], - "description": "Object type identifier" - }, - "created": { - "type": "number", - "description": "Unix timestamp of when the completion was created" - }, - "model": { - "type": "string", - "description": "Model used for the completion" - }, - "choices": { - "type": "array", - "description": "List of completion choices", - "items": { - "type": "object", - "properties": { - "index": { - "type": "number", - "description": "Index of the choice in the list" }, - "message": { - "type": "object", - "description": "The message generated by the model", - "properties": { - "role": { - "type": "string", - "description": "Role of the message author" - }, - "content": { - "type": "string", - "description": "The content of the message" - }, - "reasoning_content": { - "type": "string", - "description": "Internal reasoning content (if available)" - }, - "tool_calls": { - "type": "array", - "description": "Tool calls made by the assistant", - "items": { + "required": [ + "requests" + ] + } + ] + }, + "output": { + "oneOf": [ + { + "type": "object", + "contentType": "application/json", + "title": "Chat Completion Response", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the completion" + }, + "object": { + "type": "string", + "enum": [ + "chat.completion" + ], + "description": "Object type identifier" + }, + "created": { + "type": "number", + "description": "Unix timestamp of when the completion was created" + }, + "model": { + "type": "string", + "description": "Model used for the completion" + }, + "choices": { + "type": "array", + "description": "List of completion choices", + "items": { + "type": "object", + "properties": { + "index": { + "type": "number", + "description": "Index of the choice in the list" + }, + "message": { + "type": "object", + "description": "The message generated by the model", + "properties": { + "role": { + "type": "string", + "description": "Role of the message author" + }, + "content": { + "type": "string", + "description": "The content of the message" + }, + "reasoning_content": { + "type": "string", + "description": "Internal reasoning content (if available)" + }, + "tool_calls": { + "type": "array", + "description": "Tool calls made by the assistant", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the tool call" + }, + "type": { + "type": "string", + "enum": [ + "function" + ], + "description": "Type of tool call" + }, + "function": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the function to call" + }, + "arguments": { + "type": "string", + "description": "JSON string of arguments for the function" + } + }, + "required": [ + "name", + "arguments" + ] + } + }, + "required": [ + "id", + "type", + "function" + ] + } + } + }, + "required": [ + "role", + "content" + ] + }, + "finish_reason": { + "type": "string", + "description": "Reason why the model stopped generating" + }, + "stop_reason": { + "type": [ + "string", + "null" + ], + "description": "Stop reason (may be null)" + }, + "logprobs": { + "type": [ + "object", + "null" + ], + "description": "Log probabilities (if requested)" + } + } + } + }, + "usage": { "type": "object", + "description": "Usage statistics for the inference request", "properties": { - "id": { - "type": "string", - "description": "Unique identifier for the tool call" - }, - "type": { - "type": "string", - "enum": ["function"], - "description": "Type of tool call" - }, - "function": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, + "prompt_logprobs": { + "type": [ + "object", + "null" + ], + "description": "Log probabilities for the prompt (if requested)" + } + } + }, + { + "type": "object", + "contentType": "application/json", + "title": "Text Completion Response", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the completion" + }, + "object": { + "type": "string", + "enum": [ + "text_completion" + ], + "description": "Object type identifier" + }, + "created": { + "type": "number", + "description": "Unix timestamp of when the completion was created" + }, + "model": { + "type": "string", + "description": "Model used for the completion" + }, + "choices": { + "type": "array", + "description": "List of completion choices", + "items": { "type": "object", "properties": { - "name": { - "type": "string", - "description": "Name of the function to call" - }, - "arguments": { - "type": "string", - "description": "JSON string of arguments for the function" - } + "index": { + "type": "number", + "description": "Index of the choice in the list" + }, + "text": { + "type": "string", + "description": "The generated text completion" + }, + "finish_reason": { + "type": "string", + "description": "Reason why the model stopped generating" + }, + "stop_reason": { + "type": [ + "string", + "null" + ], + "description": "Stop reason (may be null)" + }, + "logprobs": { + "type": [ + "object", + "null" + ], + "description": "Log probabilities (if requested)" + }, + "prompt_logprobs": { + "type": [ + "object", + "null" + ], + "description": "Log probabilities for the prompt (if requested)" + } + }, + "required": [ + "index", + "text", + "finish_reason" + ] + } + }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 }, - "required": ["name", "arguments"] - } - }, - "required": ["id", "type", "function"] - } + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } } - }, - "required": ["role", "content"] - }, - "finish_reason": { - "type": "string", - "description": "Reason why the model stopped generating" - }, - "stop_reason": { - "type": ["string", "null"], - "description": "Stop reason (may be null)" - }, - "logprobs": { - "type": ["object", "null"], - "description": "Log probabilities (if requested)" } - } - } - }, - "usage": { - "$ref": "usage#/usage" - }, - "prompt_logprobs": { - "type": ["object", "null"], - "description": "Log probabilities for the prompt (if requested)" - } - } - }, - { - "type": "object", - "contentType": "application/json", - "title": "Text Completion Response", - "properties": { - "id": { - "type": "string", - "description": "Unique identifier for the completion" - }, - "object": { - "type": "string", - "enum": ["text_completion"], - "description": "Object type identifier" - }, - "created": { - "type": "number", - "description": "Unix timestamp of when the completion was created" - }, - "model": { - "type": "string", - "description": "Model used for the completion" - }, - "choices": { - "type": "array", - "description": "List of completion choices", - "items": { - "type": "object", - "properties": { - "index": { - "type": "number", - "description": "Index of the choice in the list" - }, - "text": { - "type": "string", - "description": "The generated text completion" - }, - "finish_reason": { - "type": "string", - "description": "Reason why the model stopped generating" - }, - "stop_reason": { - "type": ["string", "null"], - "description": "Stop reason (may be null)" - }, - "logprobs": { - "type": ["object", "null"], - "description": "Log probabilities (if requested)" - }, - "prompt_logprobs": { - "type": ["object", "null"], - "description": "Log probabilities for the prompt (if requested)" + }, + { + "type": "string", + "contentType": "text/event-stream", + "format": "binary" + }, + { + "type": "object", + "contentType": "application/json", + "title": "Async response", + "properties": { + "request_id": { + "type": "string", + "description": "The async request id that can be used to obtain the results." + } } - }, - "required": ["index", "text", "finish_reason"] } - }, - "usage": { - "$ref": "usage#/usage" - } - } - }, - { - "type": "string", - "contentType": "text/event-stream", - "format": "binary" - }, - { - "type": "object", - "contentType": "application/json", - "title": "Async response", - "properties": { - "request_id": { - "type": "string", - "description": "The async request id that can be used to obtain the results." - } - } - } - ] - } + ] + } } } \ No newline at end of file diff --git a/src/content/workers-ai-models/gpt-oss-120b.json b/src/content/workers-ai-models/gpt-oss-120b.json index 0c83fec96ad7d6f..9c995e2ce8f8324 100644 --- a/src/content/workers-ai-models/gpt-oss-120b.json +++ b/src/content/workers-ai-models/gpt-oss-120b.json @@ -37,48 +37,106 @@ ], "schema": { "input": { - "type": "object", - "title": "GPT_OSS_Responses", - "properties": { - "input": { - "anyOf": [ - { - "type": "string" + "oneOf": [ + { + "type": "object", + "title": "GPT_OSS_120B_Responses", + "properties": { + "input": { + "anyOf": [ + { + "type": "string" + }, + { + "items": {}, + "type": "array" + } + ], + "description": "Responses API Input messages. Refer to OpenAI Responses API docs to learn more about supported content types" }, - { - "items": {}, - "type": "array" + "reasoning": { + "type": "object", + "properties": { + "effort": { + "type": "string", + "description": "Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.", + "enum": [ + "low", + "medium", + "high" + ] + }, + "summary": { + "type": "string", + "description": "A summary of the reasoning performed by the model. This can be useful for debugging and understanding the model's reasoning process. One of auto, concise, or detailed.", + "enum": [ + "auto", + "concise", + "detailed" + ] + } + } } - ], - "description": "Responses API Input messages. Refer to OpenAI Responses API docs to learn more about supported content types", - "title": "Input" + }, + "required": [ + "input" + ] }, - "reasoning": { + { "type": "object", + "title": "GPT_OSS_120B_Responses_Async", "properties": { - "effort": { - "type": "string", - "description": "Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.", - "enum": [ - "low", - "medium", - "high" - ] - }, - "summary": { - "type": "string", - "description": "A summary of the reasoning performed by the model. This can be useful for debugging and understanding the model's reasoning process. One of auto, concise, or detailed.", - "enum": [ - "auto", - "concise", - "detailed" - ] + "requests": { + "type": "array", + "items": { + "type": "object", + "properties": { + "input": { + "anyOf": [ + { + "type": "string" + }, + { + "items": {}, + "type": "array" + } + ], + "description": "Responses API Input messages. Refer to OpenAI Responses API docs to learn more about supported content types" + }, + "reasoning": { + "type": "object", + "properties": { + "effort": { + "type": "string", + "description": "Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.", + "enum": [ + "low", + "medium", + "high" + ] + }, + "summary": { + "type": "string", + "description": "A summary of the reasoning performed by the model. This can be useful for debugging and understanding the model's reasoning process. One of auto, concise, or detailed.", + "enum": [ + "auto", + "concise", + "detailed" + ] + } + } + } + }, + "required": [ + "input" + ] + } } - } + }, + "required": [ + "requests" + ] } - }, - "required": [ - "input" ] }, "output": { diff --git a/src/content/workers-ai-models/gpt-oss-20b.json b/src/content/workers-ai-models/gpt-oss-20b.json index 91c1387ea69ae1a..d961bb58f321881 100644 --- a/src/content/workers-ai-models/gpt-oss-20b.json +++ b/src/content/workers-ai-models/gpt-oss-20b.json @@ -11,6 +11,10 @@ "created_at": "2025-08-05 10:49:53.265", "tags": [], "properties": [ + { + "property_id": "async_queue", + "value": "true" + }, { "property_id": "context_window", "value": "128000" @@ -33,48 +37,106 @@ ], "schema": { "input": { - "type": "object", - "title": "GPT_OSS_Responses", - "properties": { - "input": { - "anyOf": [ - { - "type": "string" + "oneOf": [ + { + "type": "object", + "title": "GPT_OSS_20B_Responses", + "properties": { + "input": { + "anyOf": [ + { + "type": "string" + }, + { + "items": {}, + "type": "array" + } + ], + "description": "Responses API Input messages. Refer to OpenAI Responses API docs to learn more about supported content types" }, - { - "items": {}, - "type": "array" + "reasoning": { + "type": "object", + "properties": { + "effort": { + "type": "string", + "description": "Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.", + "enum": [ + "low", + "medium", + "high" + ] + }, + "summary": { + "type": "string", + "description": "A summary of the reasoning performed by the model. This can be useful for debugging and understanding the model's reasoning process. One of auto, concise, or detailed.", + "enum": [ + "auto", + "concise", + "detailed" + ] + } + } } - ], - "description": "Responses API Input messages. Refer to OpenAI Responses API docs to learn more about supported content types", - "title": "Input" + }, + "required": [ + "input" + ] }, - "reasoning": { + { "type": "object", + "title": "GPT_OSS_20B_Responses_Async", "properties": { - "effort": { - "type": "string", - "description": "Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.", - "enum": [ - "low", - "medium", - "high" - ] - }, - "summary": { - "type": "string", - "description": "A summary of the reasoning performed by the model. This can be useful for debugging and understanding the model's reasoning process. One of auto, concise, or detailed.", - "enum": [ - "auto", - "concise", - "detailed" - ] + "requests": { + "type": "array", + "items": { + "type": "object", + "properties": { + "input": { + "anyOf": [ + { + "type": "string" + }, + { + "items": {}, + "type": "array" + } + ], + "description": "Responses API Input messages. Refer to OpenAI Responses API docs to learn more about supported content types" + }, + "reasoning": { + "type": "object", + "properties": { + "effort": { + "type": "string", + "description": "Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.", + "enum": [ + "low", + "medium", + "high" + ] + }, + "summary": { + "type": "string", + "description": "A summary of the reasoning performed by the model. This can be useful for debugging and understanding the model's reasoning process. One of auto, concise, or detailed.", + "enum": [ + "auto", + "concise", + "detailed" + ] + } + } + } + }, + "required": [ + "input" + ] + } } - } + }, + "required": [ + "requests" + ] } - }, - "required": [ - "input" ] }, "output": { diff --git a/src/content/workers-ai-models/plamo-embedding-1b.json b/src/content/workers-ai-models/plamo-embedding-1b.json index 8d253c77b86327a..a3526b1d1e8d247 100644 --- a/src/content/workers-ai-models/plamo-embedding-1b.json +++ b/src/content/workers-ai-models/plamo-embedding-1b.json @@ -24,41 +24,54 @@ ], "schema": { "input": { - "type": "object", - "properties": { - "text": { - "oneOf": [ - { "type": "string" }, - { - "type": "array", - "items": { "type": "string" } + "type": "object", + "properties": { + "text": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Input text to embed. Can be a single string or a list of strings." } - ], - "description": "Input text to embed. Can be a single string or a list of strings." - } - }, - "required": ["text"] + }, + "required": [ + "text" + ] }, "output": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "type": "array", - "items": { "type": "number" } - }, - "description": "Embedding vectors, where each vector is a list of floats." + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "number" + } + }, + "description": "Embedding vectors, where each vector is a list of floats." + }, + "shape": { + "type": "array", + "items": { + "type": "integer" + }, + "minItems": 2, + "maxItems": 2, + "description": "Shape of the embedding data as [number_of_embeddings, embedding_dimension]." + } }, - "shape": { - "type": "array", - "items": { "type": "integer" }, - "minItems": 2, - "maxItems": 2, - "description": "Shape of the embedding data as [number_of_embeddings, embedding_dimension]." - } - }, - "required": ["data", "shape"] + "required": [ + "data", + "shape" + ] } } } \ No newline at end of file diff --git a/src/content/workers-ai-models/smart-turn-v2.json b/src/content/workers-ai-models/smart-turn-v2.json index 279d415a1bcc7cc..d380f1f51202d0a 100644 --- a/src/content/workers-ai-models/smart-turn-v2.json +++ b/src/content/workers-ai-models/smart-turn-v2.json @@ -5,8 +5,8 @@ "description": "An open source, community-driven, native audio turn detection model in 2nd version", "task": { "id": "ccb1ca5a-043d-41a7-8a3b-61017b2796fd", - "name": "Voice Activity Detection", - "description": "Detecting the presence or absence of human speech, used in speech processing." + "name": "Dumb Pipe", + "description": "Internal - Dumb Pipe models don't use tensors" }, "created_at": "2025-08-04 10:08:04.219", "tags": [],