Add support for batching, function calling and loras for llama-3.3-70b

thatsKevinJain · thatsKevinJain · commit 333d587e314e · 2025-04-11T16:25:46.000-05:00
diff --git a/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json b/src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json
@@ -11,6 +11,10 @@
     "created_at": "2024-12-06 17:09:18.338",
     "tags": [],
     "properties": [
+        {
+            "property_id": "async_queue",
+            "value": "true"
+        },
         {
             "property_id": "context_window",
             "value": "24000"
@@ -30,6 +34,14 @@
                 }
             ]
         },
+        {
+            "property_id": "function_calling",
+            "value": "true"
+        },
+        {
+            "property_id": "lora",
+            "value": "true"
+        },
         {
             "property_id": "terms",
             "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE"
@@ -53,7 +65,6 @@
                         },
                         "response_format": {
                             "title": "JSON Mode",
-                            "type": "object",
                             "properties": {
                                 "type": {
                                     "type": "string",
@@ -65,6 +76,10 @@
                                 "json_schema": {}
                             }
                         },
+                        "guided_json": {
+                            "type": "object",
+                            "description": "JSON schema that should be fulfilled for the response."
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -82,7 +97,7 @@
                         },
                         "temperature": {
                             "type": "number",
-                            "default": 0.6,
+                            "default": 0.15,
                             "minimum": 0,
                             "maximum": 5,
                             "description": "Controls the randomness of the output; higher values produce more random results."
@@ -141,15 +156,67 @@
                                         "type": "string",
                                         "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')."
                                     },
-                                    "content": {
+                                    "tool_call_id": {
                                         "type": "string",
-                                        "description": "The content of the message as a string."
+                                        "description": "The tool call id. Must be supplied for tool calls for Mistral-3. If you don't know what to put here you can fall back to 000000001",
+                                        "pattern": "[a-zA-Z0-9]{9}"
+                                    },
+                                    "content": {
+                                        "oneOf": [
+                                            {
+                                                "type": "string",
+                                                "description": "The content of the message as a string."
+                                            },
+                                            {
+                                                "type": "array",
+                                                "items": {
+                                                    "type": "object",
+                                                    "properties": {
+                                                        "type": {
+                                                            "type": "string",
+                                                            "description": "Type of the content provided"
+                                                        },
+                                                        "text": {
+                                                            "type": "string"
+                                                        },
+                                                        "image_url": {
+                                                            "type": "object",
+                                                            "properties": {
+                                                                "url": {
+                                                                    "type": "string",
+                                                                    "pattern": "^data:*",
+                                                                    "description": "image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted"
+                                                                }
+                                                            }
+                                                        }
+                                                    }
+                                                }
+                                            },
+                                            {
+                                                "type": "object",
+                                                "properties": {
+                                                    "type": {
+                                                        "type": "string",
+                                                        "description": "Type of the content provided"
+                                                    },
+                                                    "text": {
+                                                        "type": "string"
+                                                    },
+                                                    "image_url": {
+                                                        "type": "object",
+                                                        "properties": {
+                                                            "url": {
+                                                                "type": "string",
+                                                                "pattern": "^data:*",
+                                                                "description": "image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted"
+                                                            }
+                                                        }
+                                                    }
+                                                }
+                                            }
+                                        ]
                                     }
-                                },
-                                "required": [
-                                    "role",
-                                    "content"
-                                ]
+                                }
                             }
                         },
                         "functions": {
@@ -313,7 +380,6 @@
                         },
                         "response_format": {
                             "title": "JSON Mode",
-                            "type": "object",
                             "properties": {
                                 "type": {
                                     "type": "string",
@@ -325,6 +391,10 @@
                                 "json_schema": {}
                             }
                         },
+                        "guided_json": {
+                            "type": "object",
+                            "description": "JSON schema that should be fufilled for the response."
+                        },
                         "raw": {
                             "type": "boolean",
                             "default": false,
@@ -342,7 +412,7 @@
                         },
                         "temperature": {
                             "type": "number",
-                            "default": 0.6,
+                            "default": 0.15,
                             "minimum": 0,
                             "maximum": 5,
                             "description": "Controls the randomness of the output; higher values produce more random results."
@@ -387,13 +457,100 @@
                     "required": [
                         "messages"
                     ]
+                },
+                {
+                    "type": "object",
+                    "properties": {
+                        "requests": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "external_reference": {
+                                        "type": "string",
+                                        "description": "User-supplied reference. This field will be present in the response as well it can be used to reference the request and response. It's NOT validated to be unique."
+                                    },
+                                    "prompt": {
+                                        "type": "string",
+                                        "minLength": 1,
+                                        "description": "Prompt for the text generation model"
+                                    },
+                                    "stream": {
+                                        "type": "boolean",
+                                        "default": false,
+                                        "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
+                                    },
+                                    "max_tokens": {
+                                        "type": "integer",
+                                        "default": 256,
+                                        "description": "The maximum number of tokens to generate in the response."
+                                    },
+                                    "temperature": {
+                                        "type": "number",
+                                        "default": 0.6,
+                                        "minimum": 0,
+                                        "maximum": 5,
+                                        "description": "Controls the randomness of the output; higher values produce more random results."
+                                    },
+                                    "top_p": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                                    },
+                                    "seed": {
+                                        "type": "integer",
+                                        "minimum": 1,
+                                        "maximum": 9999999999,
+                                        "description": "Random seed for reproducibility of the generation."
+                                    },
+                                    "repetition_penalty": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Penalty for repeated tokens; higher values discourage repetition."
+                                    },
+                                    "frequency_penalty": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+                                    },
+                                    "presence_penalty": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Increases the likelihood of the model introducing new topics."
+                                    },
+                                    "response_format": {
+                                        "title": "JSON Mode",
+                                        "type": "object",
+                                        "properties": {
+                                            "type": {
+                                                "type": "string",
+                                                "enum": [
+                                                    "json_object",
+                                                    "json_schema"
+                                                ]
+                                            },
+                                            "json_schema": {}
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "required": [
+                        "requests"
+                    ]
                 }
             ]
         },
         "output": {
             "oneOf": [
                 {
                     "type": "object",
+                    "contentType": "application/json",
                     "properties": {
                         "response": {
                             "type": "string",
@@ -444,7 +601,19 @@
                 },
                 {
                     "type": "string",
+                    "contentType": "text/event-stream",
                     "format": "binary"
+                },
+                {
+                    "type": "object",
+                    "contentType": "application/json",
+                    "title": "Async response",
+                    "properties": {
+                        "request_id": {
+                            "type": "string",
+                            "description": "The async request id that can be used to obtain the results."
+                        }
+                    }
                 }
             ]
         }
diff --git a/src/content/workers-ai-models/mistral-small-3.1-24b-instruct.json b/src/content/workers-ai-models/mistral-small-3.1-24b-instruct.json
@@ -29,10 +29,6 @@
                     "currency": "USD"
                 }
             ]
-        },
-        {
-            "property_id": "lora",
-            "value": "true"
         }
     ],
     "schema": {

Original file line number	Diff line number	Diff line change
`@@ -29,10 +29,6 @@`
`29`	`29`	`"currency": "USD"`
`30`	`30`	`}`
`31`	`31`	`]`
`32`		`- },`
`33`		`- {`
`34`		`- "property_id": "lora",`
`35`		`- "value": "true"`
`36`	`32`	`}`
`37`	`33`	`],`
`38`	`34`	`"schema": {`