diff --git a/src/content/docs/workers-ai/platform/pricing.mdx b/src/content/docs/workers-ai/platform/pricing.mdx
index 87a8411b674551..13c1d2fc4d3604 100644
--- a/src/content/docs/workers-ai/platform/pricing.mdx
+++ b/src/content/docs/workers-ai/platform/pricing.mdx
@@ -57,6 +57,7 @@ The Price in Tokens column is equivalent to the Price in Neurons column - the di
| @cf/openai/gpt-oss-120b | $0.350 per M input tokens
$0.750 per M output tokens | 31818 neurons per M input tokens
68182 neurons per M output tokens |
| @cf/openai/gpt-oss-20b | $0.200 per M input tokens
$0.300 per M output tokens | 18182 neurons per M input tokens
27273 neurons per M output tokens |
| @cf/aisingapore/gemma-sea-lion-v4-27b-it | $0.351 per M input tokens
$0.555 per M output tokens | 31876 neurons per M input tokens
50488 neurons per M output tokens |
+| @cf/ibm-granite/granite-4.0-h-micro | $0.017 per M input tokens
$0.112 per M output tokens | 1542 neurons per M input tokens
10158 neurons per M output tokens |
## Embeddings model pricing
diff --git a/src/content/workers-ai-models/granite-4.0-h-micro.json b/src/content/workers-ai-models/granite-4.0-h-micro.json
new file mode 100644
index 00000000000000..8f5cd92ce56145
--- /dev/null
+++ b/src/content/workers-ai-models/granite-4.0-h-micro.json
@@ -0,0 +1,1019 @@
+{
+ "id": "7952d0cc-cb00-4e10-be02-667565c2ee0f",
+ "source": 1,
+ "name": "@cf/ibm-granite/granite-4.0-h-micro",
+ "description": "Granite 4.0 instruct models deliver strong performance across benchmarks, achieving industry-leading results in key agentic tasks like instruction following and function calling. These efficiencies make the models well-suited for a wide range of use cases like retrieval-augmented generation (RAG), multi-agent workflows, and edge deployments.",
+ "task": {
+ "id": "c329a1f9-323d-4e91-b2aa-582dd4188d34",
+ "name": "Text Generation",
+ "description": "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks."
+ },
+ "created_at": "2025-10-07 18:46:29.436",
+ "tags": [],
+ "properties": [
+ {
+ "property_id": "context_window",
+ "value": "131000"
+ },
+ {
+ "property_id": "price",
+ "value": [
+ {
+ "unit": "per M input tokens",
+ "price": 0.017,
+ "currency": "USD"
+ },
+ {
+ "unit": "per M output tokens",
+ "price": 0.11,
+ "currency": "USD"
+ }
+ ]
+ }
+ ],
+"schema": {
+ "input": {
+ "type": "object",
+ "oneOf": [
+ {
+ "title": "Prompt",
+ "properties": {
+ "prompt": {
+ "type": "string",
+ "minLength": 1,
+ "description": "The input text prompt for the model to generate a response."
+ },
+ "lora": {
+ "type": "string",
+ "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+ },
+ "response_format": {
+ "title": "JSON Mode",
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "json_object",
+ "json_schema"
+ ]
+ },
+ "json_schema": {}
+ }
+ },
+ "raw": {
+ "type": "boolean",
+ "default": false,
+ "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+ },
+ "stream": {
+ "type": "boolean",
+ "default": false,
+ "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
+ },
+ "max_tokens": {
+ "type": "integer",
+ "default": 2000,
+ "description": "The maximum number of tokens to generate in the response."
+ },
+ "temperature": {
+ "type": "number",
+ "default": 0.6,
+ "minimum": 0,
+ "maximum": 5,
+ "description": "Controls the randomness of the output; higher values produce more random results."
+ },
+ "top_p": {
+ "type": "number",
+ "minimum": 0.001,
+ "maximum": 1,
+ "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+ },
+ "top_k": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 50,
+ "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises."
+ },
+ "seed": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 9999999999,
+ "description": "Random seed for reproducibility of the generation."
+ },
+ "repetition_penalty": {
+ "type": "number",
+ "minimum": 0,
+ "maximum": 2,
+ "description": "Penalty for repeated tokens; higher values discourage repetition."
+ },
+ "frequency_penalty": {
+ "type": "number",
+ "minimum": -2,
+ "maximum": 2,
+ "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+ },
+ "presence_penalty": {
+ "type": "number",
+ "minimum": -2,
+ "maximum": 2,
+ "description": "Increases the likelihood of the model introducing new topics."
+ }
+ },
+ "required": [
+ "prompt"
+ ]
+ },
+ {
+ "title": "Messages",
+ "properties": {
+ "messages": {
+ "type": "array",
+ "description": "An array of message objects representing the conversation history.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "role": {
+ "type": "string",
+ "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')."
+ },
+ "content": {
+ "type": "string",
+ "description": "The content of the message as a string."
+ }
+ },
+ "required": [
+ "role",
+ "content"
+ ]
+ }
+ },
+ "functions": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "code": {
+ "type": "string"
+ }
+ },
+ "required": [
+ "name",
+ "code"
+ ]
+ }
+ },
+ "tools": {
+ "type": "array",
+ "description": "A list of tools available for the assistant to use.",
+ "items": {
+ "type": "object",
+ "oneOf": [
+ {
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the tool. More descriptive the better."
+ },
+ "description": {
+ "type": "string",
+ "description": "A brief description of what the tool does."
+ },
+ "parameters": {
+ "type": "object",
+ "description": "Schema defining the parameters accepted by the tool.",
+ "properties": {
+ "type": {
+ "type": "string",
+ "description": "The type of the parameters object (usually 'object')."
+ },
+ "required": {
+ "type": "array",
+ "description": "List of required parameter names.",
+ "items": {
+ "type": "string"
+ }
+ },
+ "properties": {
+ "type": "object",
+ "description": "Definitions of each parameter.",
+ "additionalProperties": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "description": "The data type of the parameter."
+ },
+ "description": {
+ "type": "string",
+ "description": "A description of the expected parameter."
+ }
+ },
+ "required": [
+ "type",
+ "description"
+ ]
+ }
+ }
+ },
+ "required": [
+ "type",
+ "properties"
+ ]
+ }
+ },
+ "required": [
+ "name",
+ "description",
+ "parameters"
+ ]
+ },
+ {
+ "properties": {
+ "type": {
+ "type": "string",
+ "description": "Specifies the type of tool (e.g., 'function')."
+ },
+ "function": {
+ "type": "object",
+ "description": "Details of the function tool.",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the function."
+ },
+ "description": {
+ "type": "string",
+ "description": "A brief description of what the function does."
+ },
+ "parameters": {
+ "type": "object",
+ "description": "Schema defining the parameters accepted by the function.",
+ "properties": {
+ "type": {
+ "type": "string",
+ "description": "The type of the parameters object (usually 'object')."
+ },
+ "required": {
+ "type": "array",
+ "description": "List of required parameter names.",
+ "items": {
+ "type": "string"
+ }
+ },
+ "properties": {
+ "type": "object",
+ "description": "Definitions of each parameter.",
+ "additionalProperties": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "description": "The data type of the parameter."
+ },
+ "description": {
+ "type": "string",
+ "description": "A description of the expected parameter."
+ }
+ },
+ "required": [
+ "type",
+ "description"
+ ]
+ }
+ }
+ },
+ "required": [
+ "type",
+ "properties"
+ ]
+ }
+ },
+ "required": [
+ "name",
+ "description",
+ "parameters"
+ ]
+ }
+ },
+ "required": [
+ "type",
+ "function"
+ ]
+ }
+ ]
+ }
+ },
+ "response_format": {
+ "title": "JSON Mode",
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "json_object",
+ "json_schema"
+ ]
+ },
+ "json_schema": {}
+ }
+ },
+ "raw": {
+ "type": "boolean",
+ "default": false,
+ "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+ },
+ "stream": {
+ "type": "boolean",
+ "default": false,
+ "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
+ },
+ "max_tokens": {
+ "type": "integer",
+ "default": 2000,
+ "description": "The maximum number of tokens to generate in the response."
+ },
+ "temperature": {
+ "type": "number",
+ "default": 0.6,
+ "minimum": 0,
+ "maximum": 5,
+ "description": "Controls the randomness of the output; higher values produce more random results."
+ },
+ "top_p": {
+ "type": "number",
+ "minimum": 0.001,
+ "maximum": 1,
+ "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+ },
+ "top_k": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 50,
+ "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises."
+ },
+ "seed": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 9999999999,
+ "description": "Random seed for reproducibility of the generation."
+ },
+ "repetition_penalty": {
+ "type": "number",
+ "minimum": 0,
+ "maximum": 2,
+ "description": "Penalty for repeated tokens; higher values discourage repetition."
+ },
+ "frequency_penalty": {
+ "type": "number",
+ "minimum": -2,
+ "maximum": 2,
+ "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+ },
+ "presence_penalty": {
+ "type": "number",
+ "minimum": -2,
+ "maximum": 2,
+ "description": "Increases the likelihood of the model introducing new topics."
+ }
+ },
+ "required": [
+ "messages"
+ ]
+ },
+ {
+ "title": "Async Batch",
+ "type": "object",
+ "properties": {
+ "requests": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "oneOf": [
+ {
+ "title": "Prompt",
+ "properties": {
+ "prompt": {
+ "type": "string",
+ "minLength": 1,
+ "description": "The input text prompt for the model to generate a response."
+ },
+ "lora": {
+ "type": "string",
+ "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+ },
+ "response_format": {
+ "title": "JSON Mode",
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "json_object",
+ "json_schema"
+ ]
+ },
+ "json_schema": {}
+ }
+ },
+ "raw": {
+ "type": "boolean",
+ "default": false,
+ "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+ },
+ "stream": {
+ "type": "boolean",
+ "default": false,
+ "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
+ },
+ "max_tokens": {
+ "type": "integer",
+ "default": 256,
+ "description": "The maximum number of tokens to generate in the response."
+ },
+ "temperature": {
+ "type": "number",
+ "default": 0.6,
+ "minimum": 0,
+ "maximum": 5,
+ "description": "Controls the randomness of the output; higher values produce more random results."
+ },
+ "top_p": {
+ "type": "number",
+ "minimum": 0.001,
+ "maximum": 1,
+ "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+ },
+ "top_k": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 50,
+ "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises."
+ },
+ "seed": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 9999999999,
+ "description": "Random seed for reproducibility of the generation."
+ },
+ "repetition_penalty": {
+ "type": "number",
+ "minimum": 0,
+ "maximum": 2,
+ "description": "Penalty for repeated tokens; higher values discourage repetition."
+ },
+ "frequency_penalty": {
+ "type": "number",
+ "minimum": -2,
+ "maximum": 2,
+ "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+ },
+ "presence_penalty": {
+ "type": "number",
+ "minimum": -2,
+ "maximum": 2,
+ "description": "Increases the likelihood of the model introducing new topics."
+ }
+ },
+ "required": [
+ "prompt"
+ ]
+ },
+ {
+ "title": "Messages",
+ "properties": {
+ "messages": {
+ "type": "array",
+ "description": "An array of message objects representing the conversation history.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "role": {
+ "type": "string",
+ "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')."
+ },
+ "content": {
+ "type": "string",
+ "description": "The content of the message as a string."
+ }
+ },
+ "required": [
+ "role",
+ "content"
+ ]
+ }
+ },
+ "functions": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "code": {
+ "type": "string"
+ }
+ },
+ "required": [
+ "name",
+ "code"
+ ]
+ }
+ },
+ "tools": {
+ "type": "array",
+ "description": "A list of tools available for the assistant to use.",
+ "items": {
+ "type": "object",
+ "oneOf": [
+ {
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the tool. More descriptive the better."
+ },
+ "description": {
+ "type": "string",
+ "description": "A brief description of what the tool does."
+ },
+ "parameters": {
+ "type": "object",
+ "description": "Schema defining the parameters accepted by the tool.",
+ "properties": {
+ "type": {
+ "type": "string",
+ "description": "The type of the parameters object (usually 'object')."
+ },
+ "required": {
+ "type": "array",
+ "description": "List of required parameter names.",
+ "items": {
+ "type": "string"
+ }
+ },
+ "properties": {
+ "type": "object",
+ "description": "Definitions of each parameter.",
+ "additionalProperties": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "description": "The data type of the parameter."
+ },
+ "description": {
+ "type": "string",
+ "description": "A description of the expected parameter."
+ }
+ },
+ "required": [
+ "type",
+ "description"
+ ]
+ }
+ }
+ },
+ "required": [
+ "type",
+ "properties"
+ ]
+ }
+ },
+ "required": [
+ "name",
+ "description",
+ "parameters"
+ ]
+ },
+ {
+ "properties": {
+ "type": {
+ "type": "string",
+ "description": "Specifies the type of tool (e.g., 'function')."
+ },
+ "function": {
+ "type": "object",
+ "description": "Details of the function tool.",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the function."
+ },
+ "description": {
+ "type": "string",
+ "description": "A brief description of what the function does."
+ },
+ "parameters": {
+ "type": "object",
+ "description": "Schema defining the parameters accepted by the function.",
+ "properties": {
+ "type": {
+ "type": "string",
+ "description": "The type of the parameters object (usually 'object')."
+ },
+ "required": {
+ "type": "array",
+ "description": "List of required parameter names.",
+ "items": {
+ "type": "string"
+ }
+ },
+ "properties": {
+ "type": "object",
+ "description": "Definitions of each parameter.",
+ "additionalProperties": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "description": "The data type of the parameter."
+ },
+ "description": {
+ "type": "string",
+ "description": "A description of the expected parameter."
+ }
+ },
+ "required": [
+ "type",
+ "description"
+ ]
+ }
+ }
+ },
+ "required": [
+ "type",
+ "properties"
+ ]
+ }
+ },
+ "required": [
+ "name",
+ "description",
+ "parameters"
+ ]
+ }
+ },
+ "required": [
+ "type",
+ "function"
+ ]
+ }
+ ]
+ }
+ },
+ "response_format": {
+ "title": "JSON Mode",
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "json_object",
+ "json_schema"
+ ]
+ },
+ "json_schema": {}
+ }
+ },
+ "raw": {
+ "type": "boolean",
+ "default": false,
+ "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+ },
+ "stream": {
+ "type": "boolean",
+ "default": false,
+ "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
+ },
+ "max_tokens": {
+ "type": "integer",
+ "default": 256,
+ "description": "The maximum number of tokens to generate in the response."
+ },
+ "temperature": {
+ "type": "number",
+ "default": 0.6,
+ "minimum": 0,
+ "maximum": 5,
+ "description": "Controls the randomness of the output; higher values produce more random results."
+ },
+ "top_p": {
+ "type": "number",
+ "minimum": 0.001,
+ "maximum": 1,
+ "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+ },
+ "top_k": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 50,
+ "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises."
+ },
+ "seed": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 9999999999,
+ "description": "Random seed for reproducibility of the generation."
+ },
+ "repetition_penalty": {
+ "type": "number",
+ "minimum": 0,
+ "maximum": 2,
+ "description": "Penalty for repeated tokens; higher values discourage repetition."
+ },
+ "frequency_penalty": {
+ "type": "number",
+ "minimum": -2,
+ "maximum": 2,
+ "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+ },
+ "presence_penalty": {
+ "type": "number",
+ "minimum": -2,
+ "maximum": 2,
+ "description": "Increases the likelihood of the model introducing new topics."
+ }
+ },
+ "required": [
+ "messages"
+ ]
+ }
+ ]
+ }
+ }
+ },
+ "required": [
+ "requests"
+ ]
+ }
+ ]
+ },
+ "output": {
+ "oneOf": [
+ {
+ "type": "object",
+ "contentType": "application/json",
+ "title": "Chat Completion Response",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique identifier for the completion"
+ },
+ "object": {
+ "type": "string",
+ "enum": [
+ "chat.completion"
+ ],
+ "description": "Object type identifier"
+ },
+ "created": {
+ "type": "number",
+ "description": "Unix timestamp of when the completion was created"
+ },
+ "model": {
+ "type": "string",
+ "description": "Model used for the completion"
+ },
+ "choices": {
+ "type": "array",
+ "description": "List of completion choices",
+ "items": {
+ "type": "object",
+ "properties": {
+ "index": {
+ "type": "number",
+ "description": "Index of the choice in the list"
+ },
+ "message": {
+ "type": "object",
+ "description": "The message generated by the model",
+ "properties": {
+ "role": {
+ "type": "string",
+ "description": "Role of the message author"
+ },
+ "content": {
+ "type": "string",
+ "description": "The content of the message"
+ },
+ "reasoning_content": {
+ "type": "string",
+ "description": "Internal reasoning content (if available)"
+ },
+ "tool_calls": {
+ "type": "array",
+ "description": "Tool calls made by the assistant",
+ "items": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique identifier for the tool call"
+ },
+ "type": {
+ "type": "string",
+ "enum": [
+ "function"
+ ],
+ "description": "Type of tool call"
+ },
+ "function": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "Name of the function to call"
+ },
+ "arguments": {
+ "type": "string",
+ "description": "JSON string of arguments for the function"
+ }
+ },
+ "required": [
+ "name",
+ "arguments"
+ ]
+ }
+ },
+ "required": [
+ "id",
+ "type",
+ "function"
+ ]
+ }
+ }
+ },
+ "required": [
+ "role",
+ "content"
+ ]
+ },
+ "finish_reason": {
+ "type": "string",
+ "description": "Reason why the model stopped generating"
+ },
+ "stop_reason": {
+ "type": [
+ "string",
+ "null"
+ ],
+ "description": "Stop reason (may be null)"
+ },
+ "logprobs": {
+ "type": [
+ "object",
+ "null"
+ ],
+ "description": "Log probabilities (if requested)"
+ }
+ }
+ }
+ },
+ "usage": {
+ "type": "object",
+ "description": "Usage statistics for the inference request",
+ "properties": {
+ "prompt_tokens": {
+ "type": "number",
+ "description": "Total number of tokens in input",
+ "default": 0
+ },
+ "completion_tokens": {
+ "type": "number",
+ "description": "Total number of tokens in output",
+ "default": 0
+ },
+ "total_tokens": {
+ "type": "number",
+ "description": "Total number of input and output tokens",
+ "default": 0
+ }
+ }
+ },
+ "prompt_logprobs": {
+ "type": [
+ "object",
+ "null"
+ ],
+ "description": "Log probabilities for the prompt (if requested)"
+ }
+ }
+ },
+ {
+ "type": "object",
+ "contentType": "application/json",
+ "title": "Text Completion Response",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique identifier for the completion"
+ },
+ "object": {
+ "type": "string",
+ "enum": [
+ "text_completion"
+ ],
+ "description": "Object type identifier"
+ },
+ "created": {
+ "type": "number",
+ "description": "Unix timestamp of when the completion was created"
+ },
+ "model": {
+ "type": "string",
+ "description": "Model used for the completion"
+ },
+ "choices": {
+ "type": "array",
+ "description": "List of completion choices",
+ "items": {
+ "type": "object",
+ "properties": {
+ "index": {
+ "type": "number",
+ "description": "Index of the choice in the list"
+ },
+ "text": {
+ "type": "string",
+ "description": "The generated text completion"
+ },
+ "finish_reason": {
+ "type": "string",
+ "description": "Reason why the model stopped generating"
+ },
+ "stop_reason": {
+ "type": [
+ "string",
+ "null"
+ ],
+ "description": "Stop reason (may be null)"
+ },
+ "logprobs": {
+ "type": [
+ "object",
+ "null"
+ ],
+ "description": "Log probabilities (if requested)"
+ },
+ "prompt_logprobs": {
+ "type": [
+ "object",
+ "null"
+ ],
+ "description": "Log probabilities for the prompt (if requested)"
+ }
+ },
+ "required": [
+ "index",
+ "text",
+ "finish_reason"
+ ]
+ }
+ },
+ "usage": {
+ "type": "object",
+ "description": "Usage statistics for the inference request",
+ "properties": {
+ "prompt_tokens": {
+ "type": "number",
+ "description": "Total number of tokens in input",
+ "default": 0
+ },
+ "completion_tokens": {
+ "type": "number",
+ "description": "Total number of tokens in output",
+ "default": 0
+ },
+ "total_tokens": {
+ "type": "number",
+ "description": "Total number of input and output tokens",
+ "default": 0
+ }
+ }
+ }
+ }
+ },
+ {
+ "type": "string",
+ "contentType": "text/event-stream",
+ "format": "binary"
+ },
+ {
+ "type": "object",
+ "contentType": "application/json",
+ "title": "Async response",
+ "properties": {
+ "request_id": {
+ "type": "string",
+ "description": "The async request id that can be used to obtain the results."
+ }
+ }
+ }
+ ]
+ }
+ }
+}
\ No newline at end of file