cloudflare · kodster28 · Apr 17, 2025 · Apr 16, 2025 · Apr 16, 2025
@@ -11,6 +11,10 @@
     "created_at": "2024-12-06 17:09:18.338",
     "tags": [],
     "properties": [
+        {
+            "property_id": "async_queue",
+            "value": "true"
+        },
         {
             "property_id": "context_window",
             "value": "24000"
@@ -30,6 +34,10 @@
                 }
             ]
         },
+        {
+            "property_id": "function_calling",
+            "value": "true"
+        },
         {
             "property_id": "terms",
             "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE"
@@ -387,13 +395,98 @@
                     "required": [
                         "messages"
                     ]
+                },
+                {
+                    "title": "Async Batch",
+                    "type": "object",
+                    "properties": {
+                        "requests": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "external_reference": {
+                                        "type": "string",
+                                        "description": "User-supplied reference. This field will be present in the response as well it can be used to reference the request and response. It's NOT validated to be unique."
+                                    },
+                                    "prompt": {
+                                        "type": "string",
+                                        "minLength": 1,
+                                        "description": "Prompt for the text generation model"
+                                    },
+                                    "stream": {
+                                        "type": "boolean",
+                                        "default": false,
+                                        "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
+                                    },
+                                    "max_tokens": {
+                                        "type": "integer",
+                                        "default": 256,
+                                        "description": "The maximum number of tokens to generate in the response."
+                                    },
+                                    "temperature": {
+                                        "type": "number",
+                                        "default": 0.6,
+                                        "minimum": 0,
+                                        "maximum": 5,
+                                        "description": "Controls the randomness of the output; higher values produce more random results."
+                                    },
+                                    "top_p": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
+                                    },
+                                    "seed": {
+                                        "type": "integer",
+                                        "minimum": 1,
+                                        "maximum": 9999999999,
+                                        "description": "Random seed for reproducibility of the generation."
+                                    },
+                                    "repetition_penalty": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Penalty for repeated tokens; higher values discourage repetition."
+                                    },
+                                    "frequency_penalty": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Decreases the likelihood of the model repeating the same lines verbatim."
+                                    },
+                                    "presence_penalty": {
+                                        "type": "number",
+                                        "minimum": 0,
+                                        "maximum": 2,
+                                        "description": "Increases the likelihood of the model introducing new topics."
+                                    },
+                                    "response_format": {
+                                        "title": "JSON Mode",
+                                        "type": "object",
+                                        "properties": {
+                                            "type": {
+                                                "type": "string",
+                                                "enum": [
+                                                    "json_object",
+                                                    "json_schema"
+                                                ]
+                                            },
+                                            "json_schema": {}
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
                 }
             ]
         },
         "output": {
             "oneOf": [
                 {
                     "type": "object",
+                    "contentType": "application/json",
                     "properties": {
                         "response": {
                             "type": "string",
@@ -444,7 +537,19 @@
                 },
                 {
                     "type": "string",
+                    "contentType": "text/event-stream",
                     "format": "binary"
+                },
+                {
+                    "type": "object",
+                    "contentType": "application/json",
+                    "title": "Async response",
+                    "properties": {
+                        "request_id": {
+                            "type": "string",
+                            "description": "The async request id that can be used to obtain the results."
+                        }
+                    }
                 }
             ]
         }

@@ -30,6 +30,10 @@
                 }
             ]
         },
+        {
+            "property_id": "function_calling",
+            "value": "true"
+        },
         {
             "property_id": "terms",
             "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE"

@@ -31,7 +31,7 @@
             ]
         },
         {
-            "property_id": "lora",
+            "property_id": "function_calling",
             "value": "true"
         }
     ],
-Original file line number
+Diff line change
@@ Expand Up / @@ -31,7 +31,7 @@ @@
                 ]
             },
             {
-                "property_id": "lora",
+                "property_id": "function_calling",
                 "value": "true"
             }
         ],
@@ Expand Down @@