diff --git a/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json b/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json
index af983000fa2156..d4b18a3a43467b 100644
--- a/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json
+++ b/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json b/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json
index fd7a49e290756b..e61356329b6eff 100644
--- a/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json
+++ b/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/deepseek-math-7b-instruct.json b/src/content/workers-ai-models/deepseek-math-7b-instruct.json
index cfc32336b7b4e0..4327c94333424b 100644
--- a/src/content/workers-ai-models/deepseek-math-7b-instruct.json
+++ b/src/content/workers-ai-models/deepseek-math-7b-instruct.json
@@ -357,6 +357,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/discolm-german-7b-v1-awq.json b/src/content/workers-ai-models/discolm-german-7b-v1-awq.json
index 95032d3f89b0e7..8e22a36a612d9a 100644
--- a/src/content/workers-ai-models/discolm-german-7b-v1-awq.json
+++ b/src/content/workers-ai-models/discolm-german-7b-v1-awq.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/falcon-7b-instruct.json b/src/content/workers-ai-models/falcon-7b-instruct.json
index 6f3c951cbecc7f..1ca23a34b1a727 100644
--- a/src/content/workers-ai-models/falcon-7b-instruct.json
+++ b/src/content/workers-ai-models/falcon-7b-instruct.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/gemma-2b-it-lora.json b/src/content/workers-ai-models/gemma-2b-it-lora.json
index 1f1a17f9149853..c663a9308e5840 100644
--- a/src/content/workers-ai-models/gemma-2b-it-lora.json
+++ b/src/content/workers-ai-models/gemma-2b-it-lora.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/gemma-7b-it-lora.json b/src/content/workers-ai-models/gemma-7b-it-lora.json
index 29f34f2bd9f70f..9f2ed54a01c5a8 100644
--- a/src/content/workers-ai-models/gemma-7b-it-lora.json
+++ b/src/content/workers-ai-models/gemma-7b-it-lora.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/gemma-7b-it.json b/src/content/workers-ai-models/gemma-7b-it.json
index de214c0928ef94..a09614544e2537 100644
--- a/src/content/workers-ai-models/gemma-7b-it.json
+++ b/src/content/workers-ai-models/gemma-7b-it.json
@@ -373,6 +373,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json b/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json
index 7aa4c4a691566a..7cc59a13b2561d 100644
--- a/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json
+++ b/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json
@@ -357,6 +357,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-2-13b-chat-awq.json b/src/content/workers-ai-models/llama-2-13b-chat-awq.json
index a27ba94d87ce5b..dff9c82a3c9e6f 100644
--- a/src/content/workers-ai-models/llama-2-13b-chat-awq.json
+++ b/src/content/workers-ai-models/llama-2-13b-chat-awq.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-2-7b-chat-fp16.json b/src/content/workers-ai-models/llama-2-7b-chat-fp16.json
index d2aa36b2a48270..8bbf9fb2ed385a 100644
--- a/src/content/workers-ai-models/llama-2-7b-chat-fp16.json
+++ b/src/content/workers-ai-models/llama-2-7b-chat-fp16.json
@@ -357,6 +357,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json b/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json
index 52d2f0f2d0fa15..6c2c7ef94963e8 100644
--- a/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json
+++ b/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-2-7b-chat-int8.json b/src/content/workers-ai-models/llama-2-7b-chat-int8.json
index b74311b3ed3c3e..f15f1fce9e25af 100644
--- a/src/content/workers-ai-models/llama-2-7b-chat-int8.json
+++ b/src/content/workers-ai-models/llama-2-7b-chat-int8.json
@@ -344,6 +344,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-3-8b-instruct-awq.json b/src/content/workers-ai-models/llama-3-8b-instruct-awq.json
index eaa38306420889..86d45f3a4d4d97 100644
--- a/src/content/workers-ai-models/llama-3-8b-instruct-awq.json
+++ b/src/content/workers-ai-models/llama-3-8b-instruct-awq.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-3-8b-instruct.json b/src/content/workers-ai-models/llama-3-8b-instruct.json
index 4fcebca77ef913..55ec2a52efab54 100644
--- a/src/content/workers-ai-models/llama-3-8b-instruct.json
+++ b/src/content/workers-ai-models/llama-3-8b-instruct.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json b/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json
index c97e9958c22b6c..f2ff8fa6660131 100644
--- a/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json
+++ b/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json
@@ -349,6 +349,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json b/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json
index b1733be14f578f..75c364934e8ac8 100644
--- a/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json
+++ b/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json
@@ -349,6 +349,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct.json b/src/content/workers-ai-models/llama-3.1-8b-instruct.json
index b4b5fd7c9d428d..6fd36b59bb577e 100644
--- a/src/content/workers-ai-models/llama-3.1-8b-instruct.json
+++ b/src/content/workers-ai-models/llama-3.1-8b-instruct.json
@@ -349,6 +349,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-3.2-1b-instruct.json b/src/content/workers-ai-models/llama-3.2-1b-instruct.json
index 1d37740559ce4c..eb35aa6e24a884 100644
--- a/src/content/workers-ai-models/llama-3.2-1b-instruct.json
+++ b/src/content/workers-ai-models/llama-3.2-1b-instruct.json
@@ -349,6 +349,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-3.2-3b-instruct.json b/src/content/workers-ai-models/llama-3.2-3b-instruct.json
index e5fc5b67d49b6b..b215a7c281f165 100644
--- a/src/content/workers-ai-models/llama-3.2-3b-instruct.json
+++ b/src/content/workers-ai-models/llama-3.2-3b-instruct.json
@@ -349,6 +349,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json b/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json
index c60b03f219c182..395207fa51f578 100644
--- a/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json
+++ b/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json
@@ -349,6 +349,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llamaguard-7b-awq.json b/src/content/workers-ai-models/llamaguard-7b-awq.json
index 5358509863c92e..b1545f9c397d10 100644
--- a/src/content/workers-ai-models/llamaguard-7b-awq.json
+++ b/src/content/workers-ai-models/llamaguard-7b-awq.json
@@ -349,6 +349,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/llava-1.5-7b-hf.json b/src/content/workers-ai-models/llava-1.5-7b-hf.json
index 9ede5fbeebc0b3..cdf206603fd6d6 100644
--- a/src/content/workers-ai-models/llava-1.5-7b-hf.json
+++ b/src/content/workers-ai-models/llava-1.5-7b-hf.json
@@ -39,6 +39,30 @@
                             "default": false,
                             "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
                         },
+                        "top_p": {
+                            "type": "number",
+                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                        },
+                        "top_k": {
+                            "type": "number",
+                            "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises."
+                        },
+                        "seed": {
+                            "type": "number",
+                            "description": "Random seed for reproducibility of the generation."
+                        },
+                        "repetition_penalty": {
+                            "type": "number",
+                            "description": "Penalty for repeated tokens; higher values discourage repetition."
+                        },
+                        "frequency_penalty": {
+                            "type": "number",
+                            "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+                        },
+                        "presence_penalty": {
+                            "type": "number",
+                            "description": "Increases the likelihood of the model introducing new topics."
+                        },
                         "image": {
                             "oneOf": [
                                 {
diff --git a/src/content/workers-ai-models/meta-llama-3-8b-instruct.json b/src/content/workers-ai-models/meta-llama-3-8b-instruct.json
index 508f013bb789a7..e6ff5fab2a7928 100644
--- a/src/content/workers-ai-models/meta-llama-3-8b-instruct.json
+++ b/src/content/workers-ai-models/meta-llama-3-8b-instruct.json
@@ -344,6 +344,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json
index 2493b2edfa182e..0ceab1d96b9230 100644
--- a/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json
+++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json
index 673f78101bfdcd..f8a2f3981c42f4 100644
--- a/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json
+++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json
@@ -357,6 +357,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json
index eced60504f5b11..61413c647baa61 100644
--- a/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json
+++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json
index 9af988219d6796..42e65e43f92b78 100644
--- a/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json
+++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json
@@ -369,6 +369,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json b/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json
index a7c26f5d880acb..8234317b0b0f52 100644
--- a/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json
+++ b/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json
@@ -349,6 +349,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/openchat-3.5-0106.json b/src/content/workers-ai-models/openchat-3.5-0106.json
index d06ae2f8a09b03..cbb7c0dae5a1ca 100644
--- a/src/content/workers-ai-models/openchat-3.5-0106.json
+++ b/src/content/workers-ai-models/openchat-3.5-0106.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json b/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json
index c20787e20cb26d..c35017e2b0cdf6 100644
--- a/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json
+++ b/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json
@@ -349,6 +349,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/phi-2.json b/src/content/workers-ai-models/phi-2.json
index 4e3f994f6fa547..bc41a76631604a 100644
--- a/src/content/workers-ai-models/phi-2.json
+++ b/src/content/workers-ai-models/phi-2.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/qwen1.5-0.5b-chat.json b/src/content/workers-ai-models/qwen1.5-0.5b-chat.json
index dc774a96d5bd88..a1d6ceaa1fdaf9 100644
--- a/src/content/workers-ai-models/qwen1.5-0.5b-chat.json
+++ b/src/content/workers-ai-models/qwen1.5-0.5b-chat.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/qwen1.5-1.8b-chat.json b/src/content/workers-ai-models/qwen1.5-1.8b-chat.json
index 9fd715d1e71e5d..97977017e6fe36 100644
--- a/src/content/workers-ai-models/qwen1.5-1.8b-chat.json
+++ b/src/content/workers-ai-models/qwen1.5-1.8b-chat.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json b/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json
index 711a1235596206..bff46b051e4f92 100644
--- a/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json
+++ b/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json b/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json
index 812dc114b83f18..c17d98d917d4f5 100644
--- a/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json
+++ b/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/sqlcoder-7b-2.json b/src/content/workers-ai-models/sqlcoder-7b-2.json
index af330a2f4255cb..66f87190c93775 100644
--- a/src/content/workers-ai-models/sqlcoder-7b-2.json
+++ b/src/content/workers-ai-models/sqlcoder-7b-2.json
@@ -357,6 +357,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/starling-lm-7b-beta.json b/src/content/workers-ai-models/starling-lm-7b-beta.json
index 79c3e51a86611c..9ff393649d46f2 100644
--- a/src/content/workers-ai-models/starling-lm-7b-beta.json
+++ b/src/content/workers-ai-models/starling-lm-7b-beta.json
@@ -365,6 +365,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json b/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json
index f96b8a2094b39a..8f99e0d348bc31 100644
--- a/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json
+++ b/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/uform-gen2-qwen-500m.json b/src/content/workers-ai-models/uform-gen2-qwen-500m.json
index fa7f000601dcde..0400ed70b67489 100644
--- a/src/content/workers-ai-models/uform-gen2-qwen-500m.json
+++ b/src/content/workers-ai-models/uform-gen2-qwen-500m.json
@@ -43,6 +43,30 @@
                             "default": false,
                             "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
                         },
+                        "top_p": {
+                            "type": "number",
+                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                        },
+                        "top_k": {
+                            "type": "number",
+                            "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises."
+                        },
+                        "seed": {
+                            "type": "number",
+                            "description": "Random seed for reproducibility of the generation."
+                        },
+                        "repetition_penalty": {
+                            "type": "number",
+                            "description": "Penalty for repeated tokens; higher values discourage repetition."
+                        },
+                        "frequency_penalty": {
+                            "type": "number",
+                            "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+                        },
+                        "presence_penalty": {
+                            "type": "number",
+                            "description": "Increases the likelihood of the model introducing new topics."
+                        },
                         "image": {
                             "oneOf": [
                                 {
diff --git a/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json b/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json
index ad3d3b40925888..88fbe3646b7936 100644
--- a/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json
+++ b/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json
@@ -349,6 +349,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",
diff --git a/src/content/workers-ai-models/zephyr-7b-beta-awq.json b/src/content/workers-ai-models/zephyr-7b-beta-awq.json
index 520e8d02a625fa..0685aa03b4d301 100644
--- a/src/content/workers-ai-models/zephyr-7b-beta-awq.json
+++ b/src/content/workers-ai-models/zephyr-7b-beta-awq.json
@@ -353,6 +353,27 @@
                             "type": "string",
                             "description": "The generated text response from the model"
                         },
+                        "usage": {
+                            "type": "object",
+                            "description": "Usage statistics for the inference request",
+                            "properties": {
+                                "prompt_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in input",
+                                    "default": 0
+                                },
+                                "completion_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of tokens in output",
+                                    "default": 0
+                                },
+                                "total_tokens": {
+                                    "type": "number",
+                                    "description": "Total number of input and output tokens",
+                                    "default": 0
+                                }
+                            }
+                        },
                         "tool_calls": {
                             "type": "array",
                             "description": "An array of tool calls requests made during the response generation",