Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"created_at": "2024-12-06 17:09:18.338",
"tags": [],
"properties": [
{
"property_id": "async_queue",
"value": "true"
},
{
"property_id": "context_window",
"value": "24000"
Expand All @@ -30,6 +34,10 @@
}
]
},
{
"property_id": "function_calling",
"value": "true"
},
{
"property_id": "terms",
"value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE"
Expand Down Expand Up @@ -387,13 +395,98 @@
"required": [
"messages"
]
},
{
"title": "Async Batch",
"type": "object",
"properties": {
"requests": {
"type": "array",
"items": {
"type": "object",
"properties": {
"external_reference": {
"type": "string",
"description": "User-supplied reference. This field will be present in the response as well it can be used to reference the request and response. It's NOT validated to be unique."
},
"prompt": {
"type": "string",
"minLength": 1,
"description": "Prompt for the text generation model"
},
"stream": {
"type": "boolean",
"default": false,
"description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
},
"max_tokens": {
"type": "integer",
"default": 256,
"description": "The maximum number of tokens to generate in the response."
},
"temperature": {
"type": "number",
"default": 0.6,
"minimum": 0,
"maximum": 5,
"description": "Controls the randomness of the output; higher values produce more random results."
},
"top_p": {
"type": "number",
"minimum": 0,
"maximum": 2,
"description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
},
"seed": {
"type": "integer",
"minimum": 1,
"maximum": 9999999999,
"description": "Random seed for reproducibility of the generation."
},
"repetition_penalty": {
"type": "number",
"minimum": 0,
"maximum": 2,
"description": "Penalty for repeated tokens; higher values discourage repetition."
},
"frequency_penalty": {
"type": "number",
"minimum": 0,
"maximum": 2,
"description": "Decreases the likelihood of the model repeating the same lines verbatim."
},
"presence_penalty": {
"type": "number",
"minimum": 0,
"maximum": 2,
"description": "Increases the likelihood of the model introducing new topics."
},
"response_format": {
"title": "JSON Mode",
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"json_object",
"json_schema"
]
},
"json_schema": {}
}
}
}
}
}
}
}
]
},
"output": {
"oneOf": [
{
"type": "object",
"contentType": "application/json",
"properties": {
"response": {
"type": "string",
Expand Down Expand Up @@ -444,7 +537,19 @@
},
{
"type": "string",
"contentType": "text/event-stream",
"format": "binary"
},
{
"type": "object",
"contentType": "application/json",
"title": "Async response",
"properties": {
"request_id": {
"type": "string",
"description": "The async request id that can be used to obtain the results."
}
}
}
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
}
]
},
{
"property_id": "function_calling",
"value": "true"
},
{
"property_id": "terms",
"value": "https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
]
},
{
"property_id": "lora",
"property_id": "function_calling",
"value": "true"
}
],
Expand Down