Skip to content

Commit 2c67326

Browse files
authored
workers ai model updates (#21772)
* Correctly mark @cf/meta/llama-3.3-70b-instruct-fp8-fast for tool calling and batching. Updated schema to reflect batching schema. Fixed schema that was incorrectly listing fields and defaults. * Mark @cf/meta/llama-4-scout-17b-16e-instruct and @cf/mistral/mistral-small-3.1-24b-instruct for tool_calling
1 parent ab34582 commit 2c67326

File tree

3 files changed

+110
-1
lines changed

3 files changed

+110
-1
lines changed

src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
"created_at": "2024-12-06 17:09:18.338",
1212
"tags": [],
1313
"properties": [
14+
{
15+
"property_id": "async_queue",
16+
"value": "true"
17+
},
1418
{
1519
"property_id": "context_window",
1620
"value": "24000"
@@ -30,6 +34,10 @@
3034
}
3135
]
3236
},
37+
{
38+
"property_id": "function_calling",
39+
"value": "true"
40+
},
3341
{
3442
"property_id": "terms",
3543
"value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE"
@@ -387,13 +395,98 @@
387395
"required": [
388396
"messages"
389397
]
398+
},
399+
{
400+
"title": "Async Batch",
401+
"type": "object",
402+
"properties": {
403+
"requests": {
404+
"type": "array",
405+
"items": {
406+
"type": "object",
407+
"properties": {
408+
"external_reference": {
409+
"type": "string",
410+
"description": "User-supplied reference. This field will be present in the response as well it can be used to reference the request and response. It's NOT validated to be unique."
411+
},
412+
"prompt": {
413+
"type": "string",
414+
"minLength": 1,
415+
"description": "Prompt for the text generation model"
416+
},
417+
"stream": {
418+
"type": "boolean",
419+
"default": false,
420+
"description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
421+
},
422+
"max_tokens": {
423+
"type": "integer",
424+
"default": 256,
425+
"description": "The maximum number of tokens to generate in the response."
426+
},
427+
"temperature": {
428+
"type": "number",
429+
"default": 0.6,
430+
"minimum": 0,
431+
"maximum": 5,
432+
"description": "Controls the randomness of the output; higher values produce more random results."
433+
},
434+
"top_p": {
435+
"type": "number",
436+
"minimum": 0,
437+
"maximum": 2,
438+
"description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
439+
},
440+
"seed": {
441+
"type": "integer",
442+
"minimum": 1,
443+
"maximum": 9999999999,
444+
"description": "Random seed for reproducibility of the generation."
445+
},
446+
"repetition_penalty": {
447+
"type": "number",
448+
"minimum": 0,
449+
"maximum": 2,
450+
"description": "Penalty for repeated tokens; higher values discourage repetition."
451+
},
452+
"frequency_penalty": {
453+
"type": "number",
454+
"minimum": 0,
455+
"maximum": 2,
456+
"description": "Decreases the likelihood of the model repeating the same lines verbatim."
457+
},
458+
"presence_penalty": {
459+
"type": "number",
460+
"minimum": 0,
461+
"maximum": 2,
462+
"description": "Increases the likelihood of the model introducing new topics."
463+
},
464+
"response_format": {
465+
"title": "JSON Mode",
466+
"type": "object",
467+
"properties": {
468+
"type": {
469+
"type": "string",
470+
"enum": [
471+
"json_object",
472+
"json_schema"
473+
]
474+
},
475+
"json_schema": {}
476+
}
477+
}
478+
}
479+
}
480+
}
481+
}
390482
}
391483
]
392484
},
393485
"output": {
394486
"oneOf": [
395487
{
396488
"type": "object",
489+
"contentType": "application/json",
397490
"properties": {
398491
"response": {
399492
"type": "string",
@@ -444,7 +537,19 @@
444537
},
445538
{
446539
"type": "string",
540+
"contentType": "text/event-stream",
447541
"format": "binary"
542+
},
543+
{
544+
"type": "object",
545+
"contentType": "application/json",
546+
"title": "Async response",
547+
"properties": {
548+
"request_id": {
549+
"type": "string",
550+
"description": "The async request id that can be used to obtain the results."
551+
}
552+
}
448553
}
449554
]
450555
}

src/content/workers-ai-models/llama-4-scout-17b-16e-instruct.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@
3030
}
3131
]
3232
},
33+
{
34+
"property_id": "function_calling",
35+
"value": "true"
36+
},
3337
{
3438
"property_id": "terms",
3539
"value": "https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE"

src/content/workers-ai-models/mistral-small-3.1-24b-instruct.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
]
3232
},
3333
{
34-
"property_id": "lora",
34+
"property_id": "function_calling",
3535
"value": "true"
3636
}
3737
],

0 commit comments

Comments
 (0)