workers ai model updates (#21772)

ashishdatta · web-flow · commit 2c6732694a72 · 2025-04-17T09:08:10.000-05:00
* Correctly mark @cf/meta/llama-3.3-70b-instruct-fp8-fast for tool calling and batching.

Updated schema to reflect batching schema. Fixed schema that was
incorrectly listing fields and defaults.

* Mark @cf/meta/llama-4-scout-17b-16e-instruct and
@cf/mistral/mistral-small-3.1-24b-instruct for tool_calling
diff --git a/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json b/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json
@@ -11,6 +11,10 @@
     "created_at": "2024-12-06 17:09:18.338",
     "tags": [],
     "properties": [
+        {
+            "property_id": "async_queue",
+            "value": "true"
+        },
         {
             "property_id": "context_window",
             "value": "24000"
@@ -30,6 +34,10 @@
                 }
             ]
         },
+        {
+            "property_id": "function_calling",
+            "value": "true"
+        },
         {
             "property_id": "terms",
             "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE"
@@ -387,13 +395,98 @@
                     "required": [
                         "messages"
                     ]
+                },
+                {
+                    "title": "Async Batch",
+                    "type": "object",
+                    "properties": {
+                        "requests": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "external_reference": {
+                                        "type": "string",
+                                        "description": "User-supplied reference. This field will be present in the response as well it can be used to reference the request and response. It's NOT validated to be unique."
+                                    },
+                                    "prompt": {
+                                        "type": "string",
+                                        "minLength": 1,
+                                        "description": "Prompt for the text generation model"
+                                    },
+                                    "stream": {
+                                        "type": "boolean",
+                                        "default": false,
+                                        "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
+                                    },
+                                    "max_tokens": {
+                                        "type": "integer",
+                                        "default": 256,
+                                        "description": "The maximum number of tokens to generate in the response."
+                                    },
+                                    "temperature": {
+                                        "type": "number",
+                                        "default": 0.6,
+                                        "minimum": 0,
+                                        "maximum": 5,
+                                        "description": "Controls the randomness of the output; higher values produce more random results."
+                                    },
+                                    "top_p": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                                    },
+                                    "seed": {
+                                        "type": "integer",
+                                        "minimum": 1,
+                                        "maximum": 9999999999,
+                                        "description": "Random seed for reproducibility of the generation."
+                                    },
+                                    "repetition_penalty": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Penalty for repeated tokens; higher values discourage repetition."
+                                    },
+                                    "frequency_penalty": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+                                    },
+                                    "presence_penalty": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Increases the likelihood of the model introducing new topics."
+                                    },
+                                    "response_format": {
+                                        "title": "JSON Mode",
+                                        "type": "object",
+                                        "properties": {
+                                            "type": {
+                                                "type": "string",
+                                                "enum": [
+                                                    "json_object",
+                                                    "json_schema"
+                                                ]
+                                            },
+                                            "json_schema": {}
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
                 }
             ]
         },
         "output": {
             "oneOf": [
                 {
                     "type": "object",
+                    "contentType": "application/json",
                     "properties": {
                         "response": {
                             "type": "string",
@@ -444,7 +537,19 @@
                 },
                 {
                     "type": "string",
+                    "contentType": "text/event-stream",
                     "format": "binary"
+                },
+                {
+                    "type": "object",
+                    "contentType": "application/json",
+                    "title": "Async response",
+                    "properties": {
+                        "request_id": {
+                            "type": "string",
+                            "description": "The async request id that can be used to obtain the results."
+                        }
+                    }
                 }
             ]
         }
diff --git a/src/content/workers-ai-models/llama-4-scout-17b-16e-instruct.json b/src/content/workers-ai-models/llama-4-scout-17b-16e-instruct.json
@@ -30,6 +30,10 @@
                 }
             ]
         },
+        {
+            "property_id": "function_calling",
+            "value": "true"
+        },
         {
             "property_id": "terms",
             "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE"
diff --git a/src/content/workers-ai-models/mistral-small-3.1-24b-instruct.json b/src/content/workers-ai-models/mistral-small-3.1-24b-instruct.json
@@ -31,7 +31,7 @@
             ]
         },
         {
-            "property_id": "lora",
+            "property_id": "function_calling",
             "value": "true"
         }
     ],

Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,10 @@`
`30`	`30`	`}`
`31`	`31`	`]`
`32`	`32`	`},`
	`33`	`+ {`
	`34`	`+ "property_id": "function_calling",`
	`35`	`+ "value": "true"`
	`36`	`+ },`
`33`	`37`	`{`
`34`	`38`	`"property_id": "terms",`
`35`	`39`	`"value": "https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE"`
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@`
`31`	`31`	`]`
`32`	`32`	`},`
`33`	`33`	`{`
`34`		`- "property_id": "lora",`
	`34`	`+ "property_id": "function_calling",`
`35`	`35`	`"value": "true"`
`36`	`36`	`}`
`37`	`37`	`],`