diff --git a/src/content/changelog/workers-ai/2025-03-20-markdown-conversion.mdx b/src/content/changelog/workers-ai/2025-03-20-markdown-conversion.mdx
new file mode 100644
index 000000000000000..fdb6200d1111e33
--- /dev/null
+++ b/src/content/changelog/workers-ai/2025-03-20-markdown-conversion.mdx
@@ -0,0 +1,62 @@
+---
+title: Markdown conversion in Workers AI
+description: You can now convert documents in multiple formats to Markdown using the toMarkdown utility method in Workers AI.
+date: 2025-03-20T18:00:00Z
+---
+
+Document conversion plays an important role when designing and developing AI applications and agents. Workers AI now provides the `toMarkdown` utility method that developers can use to for quick, easy, and convenient conversion and summary of documents in multiple formats to Markdown language.
+
+You can call this new tool using a binding by calling `env.AI.toMarkdown()` or the using the [REST API](/api/resources/ai/) endpoint.
+
+In this example, we fetch a PDF document and an image from R2 and feed them both to `env.AI.toMarkdown()`. The result is a list of converted documents. Workers AI models are used automatically to detect and summarize the image.
+
+```typescript
+import { Env } from "./env";
+
+export default {
+  async fetch(request: Request, env: Env, ctx: ExecutionContext) {
+
+    // https://pub-979cb28270cc461d94bc8a169d8f389d.r2.dev/somatosensory.pdf
+    const pdf = await env.R2.get('somatosensory.pdf');
+
+    // https://pub-979cb28270cc461d94bc8a169d8f389d.r2.dev/cat.jpeg
+    const cat = await env.R2.get('cat.jpeg');
+
+    return Response.json(
+      await env.AI.toMarkdown([
+        {
+          name: "somatosensory.pdf",
+          blob: new Blob([await pdf.arrayBuffer()], { type: "application/octet-stream" }),
+        },
+        {
+          name: "cat.jpeg",
+          blob: new Blob([await cat.arrayBuffer()], { type: "application/octet-stream" }),
+        },
+      ]),
+    );
+  },
+};
+```
+
+This is the result:
+
+```json
+[
+	{
+		"name": "somatosensory.pdf",
+		"mimeType": "application/pdf",
+		"format": "markdown",
+		"tokens": 0,
+		"data": "# somatosensory.pdf\n## Metadata\n- PDFFormatVersion=1.4\n- IsLinearized=false\n- IsAcroFormPresent=false\n- IsXFAPresent=false\n- IsCollectionPresent=false\n- IsSignaturesPresent=false\n- Producer=Prince 20150210 (www.princexml.com)\n- Title=Anatomy of the Somatosensory System\n\n## Contents\n### Page 1\nThis is a sample document to showcase..."
+	},
+	{
+		"name": "cat.jpeg",
+		"mimeType": "image/jpeg",
+		"format": "markdown",
+		"tokens": 0,
+		"data": "The image is a close-up photograph of Grumpy Cat, a cat with a distinctive grumpy expression and piercing blue eyes. The cat has a brown face with a white stripe down its nose, and its ears are pointed upright. Its fur is light brown and darker around the face, with a pink nose and mouth. The cat's eyes are blue and slanted downward, giving it a perpetually grumpy appearance. The background is blurred, but it appears to be a dark brown color. Overall, the image is a humorous and iconic representation of the popular internet meme character, Grumpy Cat. The cat's facial expression and posture convey a sense of displeasure or annoyance, making it a relatable and entertaining image for many people."
+	}
+]
+```
+
+See [Markdown Conversion](/workers-ai/markdown-conversion/) for more information on supported formats, REST API and pricing.
\ No newline at end of file
diff --git a/src/content/docs/workers-ai/markdown-conversion.mdx b/src/content/docs/workers-ai/markdown-conversion.mdx
new file mode 100644
index 000000000000000..ea56ec425709d9f
--- /dev/null
+++ b/src/content/docs/workers-ai/markdown-conversion.mdx
@@ -0,0 +1,234 @@
+---
+title: Markdown Conversion
+pcx_content_type: how-to
+sidebar:
+  order: 5
+  badge:
+    text: Beta
+---
+
+import { Code, Type, MetaInfo, Details } from "~/components";
+
+[Markdown](https://en.wikipedia.org/wiki/Markdown) is essential for text generation and large language models (LLMs) in training and inference because it can provide structured, semantic, human, and machine-readable input. Likewise, Markdown facilitates chunking and structuring input data for better retrieval and synthesis in the context of RAGs, and its simplicity and ease of parsing and rendering make it ideal for AI Agents.
+
+For these reasons, document conversion plays an important role when designing and developing AI applications. Workers AI provides the `toMarkdown` utility method that developers can use from the [`env.AI`](/workers-ai/configuration/bindings/) binding or the REST APIs for quick, easy, and convenient conversion and summary of documents in multiple formats to Markdown language.
+
+## Methods and definitions
+
+### async env.AI.toMarkdown()
+
+Takes a list of documents in different formats and converts them to Markdown.
+
+#### Parameter
+
+- <code>documents</code>: <Type text="array"/>
+  - An array of `toMarkdownDocument`s.
+
+#### Return values
+
+- <code>results</code>: <Type text="array"/>
+  - An array of `toMarkdownDocumentResult`s.
+
+### `toMarkdownDocument` definition
+
+- `name` <Type text="string" />
+
+  - Name of the document to convert.
+
+- `blob` <Type text="Blob" />
+
+  - A new [Blob](https://developer.mozilla.org/en-US/docs/Web/API/Blob/Blob) object with the document content.
+
+### `toMarkdownDocumentResult` definition
+
+- `name` <Type text="string" />
+
+  - Name of the converted document. Matches the input name.
+
+- `mimetype` <Type text="string" />
+
+  - The detected [mime type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/MIME_types/Common_types) of the document.
+
+- `tokens` <Type text="number" />
+
+  - The estimated number of tokens of the converted document.
+
+- `data` <Type text="string" />
+
+  - The content of the converted document in Markdown format.
+
+## Supported formats
+
+This is the list of support formats. We are constantly adding new formats and updating this table.
+
+<table>
+  <tbody>
+    <th colspan="5" rowspan="1" style="width:160px">
+        Format
+    </th>
+    <th colspan="5" rowspan="1">
+      File extensions
+    </th>
+    <th colspan="5" rowspan="1">
+      Mime Types
+    </th>
+    <tr>
+      <td colspan="5" rowspan="1">
+			PDF Documents
+      </td>
+      <td colspan="5" rowspan="1">
+			`.pdf`
+      </td>
+      <td colspan="5" rowspan="1">
+			`application/pdf`
+      </td>
+    </tr>
+    <tr>
+      <td colspan="5" rowspan="1">
+			Images <sup>1</sup>
+      </td>
+      <td colspan="5" rowspan="1">
+			`.jpeg`, `.jpg`, `.png`, `.webp`, `.svg`
+      </td>
+      <td colspan="5" rowspan="1">
+			`image/jpeg`, `image/png`, `image/webp`, `image/svg+xml`
+      </td>
+    </tr>
+    <tr>
+      <td colspan="5" rowspan="1">
+			HTML Documents
+      </td>
+      <td colspan="5" rowspan="1">
+			`.html`
+      </td>
+      <td colspan="5" rowspan="1">
+			`text/html`
+      </td>
+    </tr>
+    <tr>
+      <td colspan="5" rowspan="1">
+			XML Documents
+      </td>
+      <td colspan="5" rowspan="1">
+			`.xml`
+      </td>
+      <td colspan="5" rowspan="1">
+			`application/xml`
+      </td>
+    </tr>
+    <tr>
+      <td colspan="5" rowspan="1">
+			Microsoft Office Documents
+      </td>
+      <td colspan="5" rowspan="1">
+			`.xlsx`, `.xlsm`, `.xlsb`, `.xls`, `.et`
+      </td>
+      <td colspan="5" rowspan="1">
+			`application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`, `application/vnd.ms-excel.sheet.macroenabled.12`, `application/vnd.ms-excel.sheet.binary.macroenabled.12`, `application/vnd.ms-excel`, `application/vnd.ms-excel`
+      </td>
+    </tr>
+    <tr>
+      <td colspan="5" rowspan="1">
+			Open Document Format
+      </td>
+      <td colspan="5" rowspan="1">
+			`.ods`
+      </td>
+      <td colspan="5" rowspan="1">
+			`application/vnd.oasis.opendocument.spreadsheet`
+      </td>
+    </tr>
+    <tr>
+      <td colspan="5" rowspan="1">
+			CSV
+      </td>
+      <td colspan="5" rowspan="1">
+			`.csv`
+      </td>
+      <td colspan="5" rowspan="1">
+			`text/csv`
+      </td>
+    </tr>
+    <tr>
+      <td colspan="5" rowspan="1">
+			Apple Documents
+      </td>
+      <td colspan="5" rowspan="1">
+			`.numbers`
+      </td>
+      <td colspan="5" rowspan="1">
+			`application/vnd.apple.numbers`
+      </td>
+    </tr>
+  </tbody>
+</table>
+
+<sup>1</sup> Image conversion uses two Workers AI models for object detection and summarization. See [pricing](/workers-ai/markdown-conversion/#pricing) for more details.
+
+## Example
+
+In this example, we fetch a PDF document and an image from R2 and feed them both to `env.AI.toMarkdown`. The result is a list of converted documents. Workers AI models are used automatically to detect and summarize the image.
+
+```typescript
+import { Env } from "./env";
+
+export default {
+  async fetch(request: Request, env: Env, ctx: ExecutionContext) {
+
+    // https://pub-979cb28270cc461d94bc8a169d8f389d.r2.dev/somatosensory.pdf
+    const pdf = await env.R2.get('somatosensory.pdf');
+
+    // https://pub-979cb28270cc461d94bc8a169d8f389d.r2.dev/cat.jpeg
+    const cat = await env.R2.get('cat.jpeg');
+
+    return Response.json(
+      await env.AI.toMarkdown([
+        {
+          name: "somatosensory.pdf",
+          blob: new Blob([await pdf.arrayBuffer()], { type: "application/octet-stream" }),
+        },
+        {
+          name: "cat.jpeg",
+          blob: new Blob([await cat.arrayBuffer()], { type: "application/octet-stream" }),
+        },
+      ]),
+    );
+  },
+};
+```
+
+This is the result:
+
+```json
+[
+	{
+		"name": "somatosensory.pdf",
+		"mimeType": "application/pdf",
+		"format": "markdown",
+		"tokens": 0,
+		"data": "# somatosensory.pdf\n## Metadata\n- PDFFormatVersion=1.4\n- IsLinearized=false\n- IsAcroFormPresent=false\n- IsXFAPresent=false\n- IsCollectionPresent=false\n- IsSignaturesPresent=false\n- Producer=Prince 20150210 (www.princexml.com)\n- Title=Anatomy of the Somatosensory System\n\n## Contents\n### Page 1\nThis is a sample document to showcase..."
+	},
+	{
+		"name": "cat.jpeg",
+		"mimeType": "image/jpeg",
+		"format": "markdown",
+		"tokens": 0,
+		"data": "The image is a close-up photograph of Grumpy Cat, a cat with a distinctive grumpy expression and piercing blue eyes. The cat has a brown face with a white stripe down its nose, and its ears are pointed upright. Its fur is light brown and darker around the face, with a pink nose and mouth. The cat's eyes are blue and slanted downward, giving it a perpetually grumpy appearance. The background is blurred, but it appears to be a dark brown color. Overall, the image is a humorous and iconic representation of the popular internet meme character, Grumpy Cat. The cat's facial expression and posture convey a sense of displeasure or annoyance, making it a relatable and entertaining image for many people."
+	}
+]
+```
+
+## REST API
+
+In addition to the Workers AI [binding](/workers-ai/configuration/bindings/), you can use the [REST API](/workers-ai/get-started/rest-api/):
+
+```bash
+curl https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/tomarkdown \
+  -H 'Authorization: Bearer {API_TOKEN}' \
+	-F "files=@cat.jpeg" \
+	-F "files=@somatosensory.pdf"
+```
+
+## Pricing
+
+`toMarkdown` is free for most format conversions. In some cases, like image conversion, it can use Workers AI models for object detection and summarization, which may incur additional costs if it exceeds the Workers AI free allocation limits. See the [pricing page](/workers-ai/platform/pricing/) for more details.
\ No newline at end of file
diff --git a/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json b/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json
index db4044064789438..e90d2fd66713478 100644
--- a/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json
+++ b/src/content/workers-ai-models/deepseek-coder-6.7b-base-awq.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json b/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json
index 65306283b724345..159cbd871445aaa 100644
--- a/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json
+++ b/src/content/workers-ai-models/deepseek-coder-6.7b-instruct-awq.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/deepseek-math-7b-instruct.json b/src/content/workers-ai-models/deepseek-math-7b-instruct.json
index 12a85d3aa8c4edb..9c29cd42a0f733e 100644
--- a/src/content/workers-ai-models/deepseek-math-7b-instruct.json
+++ b/src/content/workers-ai-models/deepseek-math-7b-instruct.json
@@ -37,9 +37,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -97,10 +114,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -122,7 +135,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -291,10 +303,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -312,7 +343,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -399,7 +430,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json b/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json
index 8763b60ba7b2815..870b32913659c95 100644
--- a/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json
+++ b/src/content/workers-ai-models/deepseek-r1-distill-qwen-32b.json
@@ -29,9 +29,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +106,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +127,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +295,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +335,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +422,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/discolm-german-7b-v1-awq.json b/src/content/workers-ai-models/discolm-german-7b-v1-awq.json
index 7f0fcef85bcf3f0..ba0c037f153160e 100644
--- a/src/content/workers-ai-models/discolm-german-7b-v1-awq.json
+++ b/src/content/workers-ai-models/discolm-german-7b-v1-awq.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/falcon-7b-instruct.json b/src/content/workers-ai-models/falcon-7b-instruct.json
index 6e30c0f4c636735..aaf2d015489a46a 100644
--- a/src/content/workers-ai-models/falcon-7b-instruct.json
+++ b/src/content/workers-ai-models/falcon-7b-instruct.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/gemma-2b-it-lora.json b/src/content/workers-ai-models/gemma-2b-it-lora.json
index 88a14cd353f9af0..b2eb46d1772b9f7 100644
--- a/src/content/workers-ai-models/gemma-2b-it-lora.json
+++ b/src/content/workers-ai-models/gemma-2b-it-lora.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/gemma-7b-it-lora.json b/src/content/workers-ai-models/gemma-7b-it-lora.json
index 52821bc31e13a2d..9062176c657454e 100644
--- a/src/content/workers-ai-models/gemma-7b-it-lora.json
+++ b/src/content/workers-ai-models/gemma-7b-it-lora.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/gemma-7b-it.json b/src/content/workers-ai-models/gemma-7b-it.json
index ffd606a45c318ca..def9fb7377de4b0 100644
--- a/src/content/workers-ai-models/gemma-7b-it.json
+++ b/src/content/workers-ai-models/gemma-7b-it.json
@@ -41,9 +41,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -101,10 +118,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -126,7 +139,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -295,10 +307,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -316,7 +347,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -403,7 +434,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json b/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json
index be073550cb32a24..827434a50ab0768 100644
--- a/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json
+++ b/src/content/workers-ai-models/hermes-2-pro-mistral-7b.json
@@ -37,9 +37,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -97,10 +114,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -122,7 +135,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -291,10 +303,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -312,7 +343,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -399,7 +430,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-2-13b-chat-awq.json b/src/content/workers-ai-models/llama-2-13b-chat-awq.json
index d73ff0a71db7169..f7f19a3929d1921 100644
--- a/src/content/workers-ai-models/llama-2-13b-chat-awq.json
+++ b/src/content/workers-ai-models/llama-2-13b-chat-awq.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-2-7b-chat-fp16.json b/src/content/workers-ai-models/llama-2-7b-chat-fp16.json
index 28655dc4586db51..c91409c5431ab09 100644
--- a/src/content/workers-ai-models/llama-2-7b-chat-fp16.json
+++ b/src/content/workers-ai-models/llama-2-7b-chat-fp16.json
@@ -37,9 +37,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -97,10 +114,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -122,7 +135,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -291,10 +303,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -312,7 +343,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -399,7 +430,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json b/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json
index 1a057aaa42ca5df..a2c238931755f12 100644
--- a/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json
+++ b/src/content/workers-ai-models/llama-2-7b-chat-hf-lora.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-2-7b-chat-int8.json b/src/content/workers-ai-models/llama-2-7b-chat-int8.json
index a05377fd91da919..044a37507bc8924 100644
--- a/src/content/workers-ai-models/llama-2-7b-chat-int8.json
+++ b/src/content/workers-ai-models/llama-2-7b-chat-int8.json
@@ -25,9 +25,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -85,10 +102,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -110,7 +123,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -279,10 +291,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -300,7 +331,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -387,7 +418,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-3-8b-instruct-awq.json b/src/content/workers-ai-models/llama-3-8b-instruct-awq.json
index 49808a21eb3eb96..0b689e1fdc63114 100644
--- a/src/content/workers-ai-models/llama-3-8b-instruct-awq.json
+++ b/src/content/workers-ai-models/llama-3-8b-instruct-awq.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-3-8b-instruct.json b/src/content/workers-ai-models/llama-3-8b-instruct.json
index 384b1b5be0f3450..a1818f9b3649b66 100644
--- a/src/content/workers-ai-models/llama-3-8b-instruct.json
+++ b/src/content/workers-ai-models/llama-3-8b-instruct.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json b/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json
index b7e6849aaf220d1..e2f4e2ec65c9825 100644
--- a/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json
+++ b/src/content/workers-ai-models/llama-3.1-8b-instruct-awq.json
@@ -29,9 +29,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +106,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +127,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +295,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +335,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +422,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json b/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json
index 0c7ef40b5bd0bea..4e12b3ad545da13 100644
--- a/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json
+++ b/src/content/workers-ai-models/llama-3.1-8b-instruct-fp8.json
@@ -29,9 +29,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +106,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +127,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +295,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +335,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +422,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct.json b/src/content/workers-ai-models/llama-3.1-8b-instruct.json
index cc529285a8becb0..e9612893b7cf75a 100644
--- a/src/content/workers-ai-models/llama-3.1-8b-instruct.json
+++ b/src/content/workers-ai-models/llama-3.1-8b-instruct.json
@@ -29,9 +29,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +106,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +127,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +295,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +335,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +422,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-3.2-1b-instruct.json b/src/content/workers-ai-models/llama-3.2-1b-instruct.json
index ae45ceed6defd92..fecbe4f6dce128d 100644
--- a/src/content/workers-ai-models/llama-3.2-1b-instruct.json
+++ b/src/content/workers-ai-models/llama-3.2-1b-instruct.json
@@ -29,9 +29,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +106,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +127,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +295,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +335,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +422,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-3.2-3b-instruct.json b/src/content/workers-ai-models/llama-3.2-3b-instruct.json
index 84514121cfda5be..8c03b8b0b8cb1be 100644
--- a/src/content/workers-ai-models/llama-3.2-3b-instruct.json
+++ b/src/content/workers-ai-models/llama-3.2-3b-instruct.json
@@ -29,9 +29,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +106,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +127,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +295,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +335,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +422,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json b/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json
index bfa1c5e0d2cafe0..e9acd49fc755756 100644
--- a/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json
+++ b/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json
@@ -14,6 +14,10 @@
             "property_id": "context_window",
             "value": "24000"
         },
+        {
+            "property_id": "function_calling",
+            "value": "true"
+        },
         {
             "property_id": "terms",
             "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE"
@@ -29,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/llamaguard-7b-awq.json b/src/content/workers-ai-models/llamaguard-7b-awq.json
index 0ba6dae8c14358c..00bbf98b9494942 100644
--- a/src/content/workers-ai-models/llamaguard-7b-awq.json
+++ b/src/content/workers-ai-models/llamaguard-7b-awq.json
@@ -29,9 +29,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +106,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +127,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +295,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +335,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +422,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/melotts.json b/src/content/workers-ai-models/melotts.json
index 8959f07a3f962b5..fc995d2e560dbfa 100644
--- a/src/content/workers-ai-models/melotts.json
+++ b/src/content/workers-ai-models/melotts.json
@@ -50,4 +50,4 @@
             ]
         }
     }
-}
+}
\ No newline at end of file
diff --git a/src/content/workers-ai-models/meta-llama-3-8b-instruct.json b/src/content/workers-ai-models/meta-llama-3-8b-instruct.json
index be5e832aae9c10e..8241dc8ddaedcde 100644
--- a/src/content/workers-ai-models/meta-llama-3-8b-instruct.json
+++ b/src/content/workers-ai-models/meta-llama-3-8b-instruct.json
@@ -25,9 +25,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -85,10 +102,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -110,7 +123,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -279,10 +291,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -300,7 +331,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -387,7 +418,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json
index f5f366aca8acce7..4b2a4e758b3ebe3 100644
--- a/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json
+++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.1-awq.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json
index 76b41cb89e42565..5efbfdfa9f6ccf8 100644
--- a/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json
+++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.1.json
@@ -37,9 +37,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -97,10 +114,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -122,7 +135,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -291,10 +303,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -312,7 +343,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -399,7 +430,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json
index b88c25fe4d8cf6c..39959c2d8470687 100644
--- a/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json
+++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.2-lora.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json b/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json
index 85bbc62cc8dbc66..8eb7ddfcfca1a2a 100644
--- a/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json
+++ b/src/content/workers-ai-models/mistral-7b-instruct-v0.2.json
@@ -49,9 +49,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -109,10 +126,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -134,7 +147,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -303,10 +315,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -324,7 +355,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -411,7 +442,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json b/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json
index 8fd28ad72c16eb9..80d89cbcde11a3f 100644
--- a/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json
+++ b/src/content/workers-ai-models/neural-chat-7b-v3-1-awq.json
@@ -29,9 +29,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +106,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +127,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +295,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +335,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +422,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/openchat-3.5-0106.json b/src/content/workers-ai-models/openchat-3.5-0106.json
index a77546f08ec33e6..845f10b02b3ba9e 100644
--- a/src/content/workers-ai-models/openchat-3.5-0106.json
+++ b/src/content/workers-ai-models/openchat-3.5-0106.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json b/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json
index b6893b29022945e..ad51380a33ccb5b 100644
--- a/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json
+++ b/src/content/workers-ai-models/openhermes-2.5-mistral-7b-awq.json
@@ -29,9 +29,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +106,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +127,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +295,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +335,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +422,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/phi-2.json b/src/content/workers-ai-models/phi-2.json
index 9f97cdaa899e9d5..b9e9ff5b7f694f9 100644
--- a/src/content/workers-ai-models/phi-2.json
+++ b/src/content/workers-ai-models/phi-2.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/qwen1.5-0.5b-chat.json b/src/content/workers-ai-models/qwen1.5-0.5b-chat.json
index a98f325d0b7a672..4c508c6e9a10242 100644
--- a/src/content/workers-ai-models/qwen1.5-0.5b-chat.json
+++ b/src/content/workers-ai-models/qwen1.5-0.5b-chat.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/qwen1.5-1.8b-chat.json b/src/content/workers-ai-models/qwen1.5-1.8b-chat.json
index ea9075487713f22..1bb4524ca290d15 100644
--- a/src/content/workers-ai-models/qwen1.5-1.8b-chat.json
+++ b/src/content/workers-ai-models/qwen1.5-1.8b-chat.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json b/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json
index 00fc17848f93a8c..29a45f362ca7380 100644
--- a/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json
+++ b/src/content/workers-ai-models/qwen1.5-14b-chat-awq.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json b/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json
index 628664271455465..3997aba0ce99009 100644
--- a/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json
+++ b/src/content/workers-ai-models/qwen1.5-7b-chat-awq.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/sqlcoder-7b-2.json b/src/content/workers-ai-models/sqlcoder-7b-2.json
index f81efdac4d23b9d..1af5782ec92258b 100644
--- a/src/content/workers-ai-models/sqlcoder-7b-2.json
+++ b/src/content/workers-ai-models/sqlcoder-7b-2.json
@@ -37,9 +37,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -97,10 +114,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -122,7 +135,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -291,10 +303,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -312,7 +343,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -399,7 +430,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/starling-lm-7b-beta.json b/src/content/workers-ai-models/starling-lm-7b-beta.json
index 6a94fffb23018e5..d5c5b6ccb42db64 100644
--- a/src/content/workers-ai-models/starling-lm-7b-beta.json
+++ b/src/content/workers-ai-models/starling-lm-7b-beta.json
@@ -45,9 +45,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -105,10 +122,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -130,7 +143,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -299,10 +311,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -320,7 +351,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -407,7 +438,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json b/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json
index 3dc1b3a4cd04d06..048b21c154c9af6 100644
--- a/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json
+++ b/src/content/workers-ai-models/tinyllama-1.1b-chat-v1.0.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json b/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json
index 337040b66a4a635..42904565cefd75b 100644
--- a/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json
+++ b/src/content/workers-ai-models/una-cybertron-7b-v2-bf16.json
@@ -29,9 +29,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -89,10 +106,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -114,7 +127,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -283,10 +295,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -304,7 +335,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -391,7 +422,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",
diff --git a/src/content/workers-ai-models/zephyr-7b-beta-awq.json b/src/content/workers-ai-models/zephyr-7b-beta-awq.json
index e49f0132f46625f..ed6e83a06f6b839 100644
--- a/src/content/workers-ai-models/zephyr-7b-beta-awq.json
+++ b/src/content/workers-ai-models/zephyr-7b-beta-awq.json
@@ -33,9 +33,26 @@
                         "prompt": {
                             "type": "string",
                             "minLength": 1,
-                            "maxLength": 131072,
                             "description": "The input text prompt for the model to generate a response."
                         },
+                        "lora": {
+                            "type": "string",
+                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
+                        },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -93,10 +110,6 @@
                             "minimum": 0,
                             "maximum": 2,
                             "description": "Increases the likelihood of the model introducing new topics."
-                        },
-                        "lora": {
-                            "type": "string",
-                            "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
                         }
                     },
                     "required": [
@@ -118,7 +131,6 @@
                                     },
                                     "content": {
                                         "type": "string",
-                                        "maxLength": 131072,
                                         "description": "The content of the message as a string."
                                     }
                                 },
@@ -287,10 +299,29 @@
                                 ]
                             }
                         },
+                        "response_format": {
+                            "title": "JSON Mode",
+                            "type": "object",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": [
+                                        "json_object",
+                                        "json_schema"
+                                    ]
+                                },
+                                "json_schema": {}
+                            }
+                        },
+                        "raw": {
+                            "type": "boolean",
+                            "default": false,
+                            "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting."
+                        },
                         "stream": {
                             "type": "boolean",
                             "default": false,
-                            "description": "If true, the response will be streamed back incrementally."
+                            "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
                         },
                         "max_tokens": {
                             "type": "integer",
@@ -308,7 +339,7 @@
                             "type": "number",
                             "minimum": 0,
                             "maximum": 2,
-                            "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                            "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
                         },
                         "top_k": {
                             "type": "integer",
@@ -395,7 +426,10 @@
                                 }
                             }
                         }
-                    }
+                    },
+                    "required": [
+                        "response"
+                    ]
                 },
                 {
                     "type": "string",