diff --git a/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json b/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json index af983000fa2156..d4b18a3a43467b 100644 --- a/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json +++ b/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json b/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json index fd7a49e290756b..e61356329b6eff 100644 --- a/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json +++ b/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/deepseek-math-7b-instruct.json b/src/content/workers-ai-models/deepseek-math-7b-instruct.json index cfc32336b7b4e0..4327c94333424b 100644 --- a/src/content/workers-ai-models/deepseek-math-7b-instruct.json +++ b/src/content/workers-ai-models/deepseek-math-7b-instruct.json @@ -357,6 +357,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/discolm-german-7b-v1-awq.json b/src/content/workers-ai-models/discolm-german-7b-v1-awq.json index 95032d3f89b0e7..8e22a36a612d9a 100644 --- a/src/content/workers-ai-models/discolm-german-7b-v1-awq.json +++ b/src/content/workers-ai-models/discolm-german-7b-v1-awq.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/falcon-7b-instruct.json b/src/content/workers-ai-models/falcon-7b-instruct.json index 6f3c951cbecc7f..1ca23a34b1a727 100644 --- a/src/content/workers-ai-models/falcon-7b-instruct.json +++ b/src/content/workers-ai-models/falcon-7b-instruct.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/gemma-2b-it-lora.json b/src/content/workers-ai-models/gemma-2b-it-lora.json index 1f1a17f9149853..c663a9308e5840 100644 --- a/src/content/workers-ai-models/gemma-2b-it-lora.json +++ b/src/content/workers-ai-models/gemma-2b-it-lora.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/gemma-7b-it-lora.json b/src/content/workers-ai-models/gemma-7b-it-lora.json index 29f34f2bd9f70f..9f2ed54a01c5a8 100644 --- a/src/content/workers-ai-models/gemma-7b-it-lora.json +++ b/src/content/workers-ai-models/gemma-7b-it-lora.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/gemma-7b-it.json b/src/content/workers-ai-models/gemma-7b-it.json index de214c0928ef94..a09614544e2537 100644 --- a/src/content/workers-ai-models/gemma-7b-it.json +++ b/src/content/workers-ai-models/gemma-7b-it.json @@ -373,6 +373,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json b/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json index 7aa4c4a691566a..7cc59a13b2561d 100644 --- a/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json +++ b/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json @@ -357,6 +357,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-2-13b-chat-awq.json b/src/content/workers-ai-models/llama-2-13b-chat-awq.json index a27ba94d87ce5b..dff9c82a3c9e6f 100644 --- a/src/content/workers-ai-models/llama-2-13b-chat-awq.json +++ b/src/content/workers-ai-models/llama-2-13b-chat-awq.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-2-7b-chat-fp16.json b/src/content/workers-ai-models/llama-2-7b-chat-fp16.json index d2aa36b2a48270..8bbf9fb2ed385a 100644 --- a/src/content/workers-ai-models/llama-2-7b-chat-fp16.json +++ b/src/content/workers-ai-models/llama-2-7b-chat-fp16.json @@ -357,6 +357,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json b/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json index 52d2f0f2d0fa15..6c2c7ef94963e8 100644 --- a/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json +++ b/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-2-7b-chat-int8.json b/src/content/workers-ai-models/llama-2-7b-chat-int8.json index b74311b3ed3c3e..f15f1fce9e25af 100644 --- a/src/content/workers-ai-models/llama-2-7b-chat-int8.json +++ b/src/content/workers-ai-models/llama-2-7b-chat-int8.json @@ -344,6 +344,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-3-8b-instruct-awq.json b/src/content/workers-ai-models/llama-3-8b-instruct-awq.json index eaa38306420889..86d45f3a4d4d97 100644 --- a/src/content/workers-ai-models/llama-3-8b-instruct-awq.json +++ b/src/content/workers-ai-models/llama-3-8b-instruct-awq.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-3-8b-instruct.json b/src/content/workers-ai-models/llama-3-8b-instruct.json index 4fcebca77ef913..55ec2a52efab54 100644 --- a/src/content/workers-ai-models/llama-3-8b-instruct.json +++ b/src/content/workers-ai-models/llama-3-8b-instruct.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json b/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json index c97e9958c22b6c..f2ff8fa6660131 100644 --- a/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json +++ b/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json @@ -349,6 +349,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json b/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json index b1733be14f578f..75c364934e8ac8 100644 --- a/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json +++ b/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json @@ -349,6 +349,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct.json b/src/content/workers-ai-models/llama-3.1-8b-instruct.json index b4b5fd7c9d428d..6fd36b59bb577e 100644 --- a/src/content/workers-ai-models/llama-3.1-8b-instruct.json +++ b/src/content/workers-ai-models/llama-3.1-8b-instruct.json @@ -349,6 +349,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-3.2-1b-instruct.json b/src/content/workers-ai-models/llama-3.2-1b-instruct.json index 1d37740559ce4c..eb35aa6e24a884 100644 --- a/src/content/workers-ai-models/llama-3.2-1b-instruct.json +++ b/src/content/workers-ai-models/llama-3.2-1b-instruct.json @@ -349,6 +349,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-3.2-3b-instruct.json b/src/content/workers-ai-models/llama-3.2-3b-instruct.json index e5fc5b67d49b6b..b215a7c281f165 100644 --- a/src/content/workers-ai-models/llama-3.2-3b-instruct.json +++ b/src/content/workers-ai-models/llama-3.2-3b-instruct.json @@ -349,6 +349,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json b/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json index c60b03f219c182..395207fa51f578 100644 --- a/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json +++ b/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json @@ -349,6 +349,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llamaguard-7b-awq.json b/src/content/workers-ai-models/llamaguard-7b-awq.json index 5358509863c92e..b1545f9c397d10 100644 --- a/src/content/workers-ai-models/llamaguard-7b-awq.json +++ b/src/content/workers-ai-models/llamaguard-7b-awq.json @@ -349,6 +349,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/llava-1.5-7b-hf.json b/src/content/workers-ai-models/llava-1.5-7b-hf.json index 9ede5fbeebc0b3..cdf206603fd6d6 100644 --- a/src/content/workers-ai-models/llava-1.5-7b-hf.json +++ b/src/content/workers-ai-models/llava-1.5-7b-hf.json @@ -39,6 +39,30 @@ "default": false, "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." }, + "top_p": { + "type": "number", + "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + }, + "top_k": { + "type": "number", + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + }, + "seed": { + "type": "number", + "description": "Random seed for reproducibility of the generation." + }, + "repetition_penalty": { + "type": "number", + "description": "Penalty for repeated tokens; higher values discourage repetition." + }, + "frequency_penalty": { + "type": "number", + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + }, + "presence_penalty": { + "type": "number", + "description": "Increases the likelihood of the model introducing new topics." + }, "image": { "oneOf": [ { diff --git a/src/content/workers-ai-models/meta-llama-3-8b-instruct.json b/src/content/workers-ai-models/meta-llama-3-8b-instruct.json index 508f013bb789a7..e6ff5fab2a7928 100644 --- a/src/content/workers-ai-models/meta-llama-3-8b-instruct.json +++ b/src/content/workers-ai-models/meta-llama-3-8b-instruct.json @@ -344,6 +344,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json index 2493b2edfa182e..0ceab1d96b9230 100644 --- a/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json +++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json index 673f78101bfdcd..f8a2f3981c42f4 100644 --- a/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json +++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json @@ -357,6 +357,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json index eced60504f5b11..61413c647baa61 100644 --- a/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json +++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json index 9af988219d6796..42e65e43f92b78 100644 --- a/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json +++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json @@ -369,6 +369,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json b/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json index a7c26f5d880acb..8234317b0b0f52 100644 --- a/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json +++ b/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json @@ -349,6 +349,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/openchat-3.5-0106.json b/src/content/workers-ai-models/openchat-3.5-0106.json index d06ae2f8a09b03..cbb7c0dae5a1ca 100644 --- a/src/content/workers-ai-models/openchat-3.5-0106.json +++ b/src/content/workers-ai-models/openchat-3.5-0106.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json b/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json index c20787e20cb26d..c35017e2b0cdf6 100644 --- a/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json +++ b/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json @@ -349,6 +349,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/phi-2.json b/src/content/workers-ai-models/phi-2.json index 4e3f994f6fa547..bc41a76631604a 100644 --- a/src/content/workers-ai-models/phi-2.json +++ b/src/content/workers-ai-models/phi-2.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/qwen1.5-0.5b-chat.json b/src/content/workers-ai-models/qwen1.5-0.5b-chat.json index dc774a96d5bd88..a1d6ceaa1fdaf9 100644 --- a/src/content/workers-ai-models/qwen1.5-0.5b-chat.json +++ b/src/content/workers-ai-models/qwen1.5-0.5b-chat.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/qwen1.5-1.8b-chat.json b/src/content/workers-ai-models/qwen1.5-1.8b-chat.json index 9fd715d1e71e5d..97977017e6fe36 100644 --- a/src/content/workers-ai-models/qwen1.5-1.8b-chat.json +++ b/src/content/workers-ai-models/qwen1.5-1.8b-chat.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json b/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json index 711a1235596206..bff46b051e4f92 100644 --- a/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json +++ b/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json b/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json index 812dc114b83f18..c17d98d917d4f5 100644 --- a/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json +++ b/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/sqlcoder-7b-2.json b/src/content/workers-ai-models/sqlcoder-7b-2.json index af330a2f4255cb..66f87190c93775 100644 --- a/src/content/workers-ai-models/sqlcoder-7b-2.json +++ b/src/content/workers-ai-models/sqlcoder-7b-2.json @@ -357,6 +357,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/starling-lm-7b-beta.json b/src/content/workers-ai-models/starling-lm-7b-beta.json index 79c3e51a86611c..9ff393649d46f2 100644 --- a/src/content/workers-ai-models/starling-lm-7b-beta.json +++ b/src/content/workers-ai-models/starling-lm-7b-beta.json @@ -365,6 +365,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json b/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json index f96b8a2094b39a..8f99e0d348bc31 100644 --- a/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json +++ b/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/uform-gen2-qwen-500m.json b/src/content/workers-ai-models/uform-gen2-qwen-500m.json index fa7f000601dcde..0400ed70b67489 100644 --- a/src/content/workers-ai-models/uform-gen2-qwen-500m.json +++ b/src/content/workers-ai-models/uform-gen2-qwen-500m.json @@ -43,6 +43,30 @@ "default": false, "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." }, + "top_p": { + "type": "number", + "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + }, + "top_k": { + "type": "number", + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + }, + "seed": { + "type": "number", + "description": "Random seed for reproducibility of the generation." + }, + "repetition_penalty": { + "type": "number", + "description": "Penalty for repeated tokens; higher values discourage repetition." + }, + "frequency_penalty": { + "type": "number", + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + }, + "presence_penalty": { + "type": "number", + "description": "Increases the likelihood of the model introducing new topics." + }, "image": { "oneOf": [ { diff --git a/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json b/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json index ad3d3b40925888..88fbe3646b7936 100644 --- a/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json +++ b/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json @@ -349,6 +349,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation", diff --git a/src/content/workers-ai-models/zephyr-7b-beta-awq.json b/src/content/workers-ai-models/zephyr-7b-beta-awq.json index 520e8d02a625fa..0685aa03b4d301 100644 --- a/src/content/workers-ai-models/zephyr-7b-beta-awq.json +++ b/src/content/workers-ai-models/zephyr-7b-beta-awq.json @@ -353,6 +353,27 @@ "type": "string", "description": "The generated text response from the model" }, + "usage": { + "type": "object", + "description": "Usage statistics for the inference request", + "properties": { + "prompt_tokens": { + "type": "number", + "description": "Total number of tokens in input", + "default": 0 + }, + "completion_tokens": { + "type": "number", + "description": "Total number of tokens in output", + "default": 0 + }, + "total_tokens": { + "type": "number", + "description": "Total number of input and output tokens", + "default": 0 + } + } + }, "tool_calls": { "type": "array", "description": "An array of tool calls requests made during the response generation",