From 5b1f2739dc557a42ed9c6fec54d9709be769b7d1 Mon Sep 17 00:00:00 2001
From: Celso Martinho <celso@cloudflare.com>
Date: Tue, 25 Feb 2025 15:08:35 +0000
Subject: [PATCH 1/6] JSON Mode documentation

---
 .../workers-ai/2025-02-25-json-mode.mdx       |   7 ++
 .../docs/workers-ai/json-mode/index.mdx       | 117 ++++++++++++++++++
 2 files changed, 124 insertions(+)
 create mode 100644 src/content/changelog/workers-ai/2025-02-25-json-mode.mdx
 create mode 100644 src/content/docs/workers-ai/json-mode/index.mdx
diff --git a/src/content/changelog/workers-ai/2025-02-25-json-mode.mdx b/src/content/changelog/workers-ai/2025-02-25-json-mode.mdx
new file mode 100644
index 000000000000000..a7bf9d52039d60f
--- /dev/null
+++ b/src/content/changelog/workers-ai/2025-02-25-json-mode.mdx
@@ -0,0 +1,7 @@
+---
+title: Workers AI JSON Mode
+description: Workers AI JSON Mode adds structured outputs support
+date: 2025-02-25T15:00:00Z
+---
+
+We've updated the Workers AI to support [JSON mode](/workers-ai/json-mode/), enabling applications to request a structured output response when interacting with AI models.
\ No newline at end of file
diff --git a/src/content/docs/workers-ai/json-mode/index.mdx b/src/content/docs/workers-ai/json-mode/index.mdx
new file mode 100644
index 000000000000000..0c451ea7ee01260
--- /dev/null
+++ b/src/content/docs/workers-ai/json-mode/index.mdx
@@ -0,0 +1,117 @@
+---
+pcx_content_type: navigation
+title: JSON Mode
+hideChildren: true
+sidebar:
+  order: 5
+---
+
+import { Code } from "~/components";
+
+export const jsonModeSchema = `{
+  response_format: {
+    title: "JSON Mode",
+    type: "object",
+    properties: {
+      type: {
+        type: "string",
+        enum: ["json_object", "json_schema"],
+      },
+      json_schema: {},
+    }
+  }
+}`;
+
+export const jsonModeRequestExample = `{
+  "messages": [
+    {
+      "role": "system",
+      "content": "Extract data about a country."
+    },
+    {
+      "role": "user",
+      "content": "Tell me about India."
+    }
+  ],
+  "response_format": {
+    "type": "json_schema",
+    "json_schema": {
+      "type": "object",
+      "properties": {
+        "name": {
+          "type": "string"
+        },
+        "capital": {
+          "type": "string"
+        },
+        "languages": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        }
+      },
+      "required": [
+        "name",
+        "capital",
+        "languages"
+      ]
+    }
+  }
+}`;
+
+export const jsonModeResponseExample = `{
+  "response": {
+    "name": "India",
+    "capital": "New Delhi",
+    "languages": [
+      "Hindi",
+      "English",
+      "Bengali",
+      "Telugu",
+      "Marathi",
+      "Tamil",
+      "Gujarati",
+      "Urdu",
+      "Kannada",
+      "Odia",
+      "Malayalam",
+      "Punjabi",
+      "Sanskrit"
+    ]
+  }
+}`;
+
+When we want text-generation AI models to interact with databases, services, and external systems programmatically, typically when using tool calling or building AI agents, we must have structured response formats rather than natural language.
+
+Workers AI supports JSON mode, enabling applications to request a structured output response when interacting with AI models.
+
+Here's a request to `@cf/meta/llama-3.1-8b-instruct-fp8-fast` using JSON mode:
+
+<Code code={jsonModeRequestExample} lang="json" />
+
+And what the response from the model:
+
+<Code code={jsonModeResponseExample} lang="json" />
+
+As you can see, the model is complying with the JSON schema definition in the request and responding with a validated JSON object.
+
+JSON mode is compatible with OpenAI’s implementation; to enable add the `response_format` property to the request object following this schema:
+
+<Code code={jsonModeSchema} lang="json" />
+
+Where `json_schema` must be a valid [JSON Schema](https://json-schema.org/) declaration.
+
+This is the list of models that now support JSON mode:
+
+- [@cf/meta/llama-3.1-8b-instruct-fast](/workers-ai/models/llama-3.1-8b-instruct-fast/)
+- [@cf/meta/llama-3.1-70b-instruct](/workers-ai/models/llama-3.1-70b-instruct/)
+- [@cf/meta/llama-3.3-70b-instruct-fp8-fast](/workers-ai/models/llama-3.3-70b-instruct-fp8-fast/)
+- [@cf/deepseek-ai/deepseek-r1-distill-qwen-32b](/workers-ai/models/deepseek-r1-distill-qwen-32b/)
+- [@cf/meta/llama-3-8b-instruct](/workers-ai/models/llama-3-8b-instruct/)
+- [@cf/meta/llama-3.1-8b-instruct](/workers-ai/models/llama-3.1-8b-instruct/)
+- [@hf/nousresearch/hermes-2-pro-mistral-7b](/workers-ai/models/hermes-2-pro-mistral-7b/)
+
+We will continue extending this list to keep up with new, and requested models.
+
+Note that Workers AI can't guarantee that the model responds according to the requested JSON Schema. Depending on the complexity of the task and adequacy of the JSON Schema, the model may not be able to satisfy the request in extreme situations. If that's the case, then an error `JSON Mode couldn't be met` is returned and must be handled.
\ No newline at end of file

From 5dc1e11d7dfdb6c9ff9687ee2225f04c9855c09c Mon Sep 17 00:00:00 2001
From: Celso Martinho <celso@cloudflare.com>
Date: Tue, 25 Feb 2025 15:30:07 +0000
Subject: [PATCH 2/6] json mode doesn't work with streaming

---
 src/content/docs/workers-ai/json-mode/index.mdx | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/content/docs/workers-ai/json-mode/index.mdx b/src/content/docs/workers-ai/json-mode/index.mdx
index 0c451ea7ee01260..b20e0f6143cb27e 100644
--- a/src/content/docs/workers-ai/json-mode/index.mdx
+++ b/src/content/docs/workers-ai/json-mode/index.mdx
@@ -107,11 +107,15 @@ This is the list of models that now support JSON mode:
 - [@cf/meta/llama-3.1-8b-instruct-fast](/workers-ai/models/llama-3.1-8b-instruct-fast/)
 - [@cf/meta/llama-3.1-70b-instruct](/workers-ai/models/llama-3.1-70b-instruct/)
 - [@cf/meta/llama-3.3-70b-instruct-fp8-fast](/workers-ai/models/llama-3.3-70b-instruct-fp8-fast/)
-- [@cf/deepseek-ai/deepseek-r1-distill-qwen-32b](/workers-ai/models/deepseek-r1-distill-qwen-32b/)
 - [@cf/meta/llama-3-8b-instruct](/workers-ai/models/llama-3-8b-instruct/)
 - [@cf/meta/llama-3.1-8b-instruct](/workers-ai/models/llama-3.1-8b-instruct/)
+- [@cf/meta/llama-3.2-11b-vision-instruct](/workers-ai/models/llama-3.2-11b-vision-instruct/)
 - [@hf/nousresearch/hermes-2-pro-mistral-7b](/workers-ai/models/hermes-2-pro-mistral-7b/)
+- [@hf/thebloke/deepseek-coder-6.7b-instruct-awq](/workers-ai/models/deepseek-coder-6.7b-instruct-awq/)
+- [@cf/deepseek-ai/deepseek-r1-distill-qwen-32b](/workers-ai/models/deepseek-r1-distill-qwen-32b/)
 
 We will continue extending this list to keep up with new, and requested models.
 
-Note that Workers AI can't guarantee that the model responds according to the requested JSON Schema. Depending on the complexity of the task and adequacy of the JSON Schema, the model may not be able to satisfy the request in extreme situations. If that's the case, then an error `JSON Mode couldn't be met` is returned and must be handled.
\ No newline at end of file
+Note that Workers AI can't guarantee that the model responds according to the requested JSON Schema. Depending on the complexity of the task and adequacy of the JSON Schema, the model may not be able to satisfy the request in extreme situations. If that's the case, then an error `JSON Mode couldn't be met` is returned and must be handled.
+
+JSON Mode currently doesn't support streaming.
\ No newline at end of file

From 3b3d510ec876919ee3f536c3cdcf2de8c60774f2 Mon Sep 17 00:00:00 2001
From: Celso Martinho <celso@cloudflare.com>
Date: Tue, 25 Feb 2025 15:55:42 +0000
Subject: [PATCH 3/6] typo

---
 src/content/docs/workers-ai/json-mode/index.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/content/docs/workers-ai/json-mode/index.mdx b/src/content/docs/workers-ai/json-mode/index.mdx
index b20e0f6143cb27e..7a12b73c13518dd 100644
--- a/src/content/docs/workers-ai/json-mode/index.mdx
+++ b/src/content/docs/workers-ai/json-mode/index.mdx
@@ -90,7 +90,7 @@ Here's a request to `@cf/meta/llama-3.1-8b-instruct-fp8-fast` using JSON mode:
 
 <Code code={jsonModeRequestExample} lang="json" />
 
-And what the response from the model:
+And here's the response from the model, in JSON:
 
 <Code code={jsonModeResponseExample} lang="json" />
 

From c04ee7035b59ee081990cb3279f1e1c72fe7c11e Mon Sep 17 00:00:00 2001
From: Celso Martinho <celso@cloudflare.com>
Date: Tue, 25 Feb 2025 16:24:37 +0000
Subject: [PATCH 4/6] improves json mode

---
 .nvmrc                                        |  1 +
 .../docs/workers-ai/json-mode/index.mdx       | 24 ++++++++++++-------
 2 files changed, 16 insertions(+), 9 deletions(-)
 create mode 100644 .nvmrc

diff --git a/.nvmrc b/.nvmrc
new file mode 100644
index 000000000000000..f4835650e369b38
--- /dev/null
+++ b/.nvmrc
@@ -0,0 +1 @@
+v22.9.0
\ No newline at end of file
diff --git a/src/content/docs/workers-ai/json-mode/index.mdx b/src/content/docs/workers-ai/json-mode/index.mdx
index 7a12b73c13518dd..452fb278aeaf26e 100644
--- a/src/content/docs/workers-ai/json-mode/index.mdx
+++ b/src/content/docs/workers-ai/json-mode/index.mdx
@@ -84,25 +84,31 @@ export const jsonModeResponseExample = `{
 
 When we want text-generation AI models to interact with databases, services, and external systems programmatically, typically when using tool calling or building AI agents, we must have structured response formats rather than natural language.
 
-Workers AI supports JSON mode, enabling applications to request a structured output response when interacting with AI models.
+Workers AI supports JSON Mode, enabling applications to request a structured output response when interacting with AI models.
 
-Here's a request to `@cf/meta/llama-3.1-8b-instruct-fp8-fast` using JSON mode:
+## Schema
+
+JSON Mode is compatible with OpenAI’s implementation; to enable add the `response_format` property to the request object using the following convention:
+
+<Code code={jsonModeSchema} lang="json" />
+
+Where `json_schema` must be a valid [JSON Schema](https://json-schema.org/) declaration.
+
+## JSON Mode example
+
+When using JSON Format, pass the schema as in the example below as part of the request you send to the LLM.
 
 <Code code={jsonModeRequestExample} lang="json" />
 
-And here's the response from the model, in JSON:
+The LLM will follow the schema, and return a response such as below:
 
 <Code code={jsonModeResponseExample} lang="json" />
 
 As you can see, the model is complying with the JSON schema definition in the request and responding with a validated JSON object.
 
-JSON mode is compatible with OpenAI’s implementation; to enable add the `response_format` property to the request object following this schema:
-
-<Code code={jsonModeSchema} lang="json" />
-
-Where `json_schema` must be a valid [JSON Schema](https://json-schema.org/) declaration.
+## Supported Models
 
-This is the list of models that now support JSON mode:
+This is the list of models that now support JSON Mode:
 
 - [@cf/meta/llama-3.1-8b-instruct-fast](/workers-ai/models/llama-3.1-8b-instruct-fast/)
 - [@cf/meta/llama-3.1-70b-instruct](/workers-ai/models/llama-3.1-70b-instruct/)

From 4125567d7821d77357e9590a94b87128a36af89a Mon Sep 17 00:00:00 2001
From: Celso Martinho <celso@cloudflare.com>
Date: Tue, 25 Feb 2025 17:20:18 +0000
Subject: [PATCH 5/6] updates llamaguard-3 schema

---
 src/content/workers-ai-models/llama-guard-3-8b.json | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/content/workers-ai-models/llama-guard-3-8b.json b/src/content/workers-ai-models/llama-guard-3-8b.json
index ae4e67a90d4e325..79d49e7073d7432 100644
--- a/src/content/workers-ai-models/llama-guard-3-8b.json
+++ b/src/content/workers-ai-models/llama-guard-3-8b.json
@@ -21,12 +21,14 @@
                         "type": "object",
                         "properties": {
                             "role": {
-                                "type": "string",
-                                "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')."
+                                "enum": [
+                                    "user",
+                                    "assistant"
+                                ],
+                                "description": "The role of the message sender must alternate between 'user' and 'assistant'."
                             },
                             "content": {
                                 "type": "string",
-                                "maxLength": 131072,
                                 "description": "The content of the message as a string."
                             }
                         },

From 0174789dd8e260fe648d5b1e56225261c8c086a2 Mon Sep 17 00:00:00 2001
From: Celso Martinho <celso@cloudflare.com>
Date: Tue, 25 Feb 2025 17:30:58 +0000
Subject: [PATCH 6/6] adds maxLength limit

---
 src/content/workers-ai-models/llama-guard-3-8b.json | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/content/workers-ai-models/llama-guard-3-8b.json b/src/content/workers-ai-models/llama-guard-3-8b.json
index 79d49e7073d7432..ae4e67a90d4e325 100644
--- a/src/content/workers-ai-models/llama-guard-3-8b.json
+++ b/src/content/workers-ai-models/llama-guard-3-8b.json
@@ -21,14 +21,12 @@
                         "type": "object",
                         "properties": {
                             "role": {
-                                "enum": [
-                                    "user",
-                                    "assistant"
-                                ],
-                                "description": "The role of the message sender must alternate between 'user' and 'assistant'."
+                                "type": "string",
+                                "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')."
                             },
                             "content": {
                                 "type": "string",
+                                "maxLength": 131072,
                                 "description": "The content of the message as a string."
                             }
                         },