Skip to content

Commit b7aebaa

Browse files
Add support for batching, function calling and loras for llama-3.3-70b
1 parent 2a8d1c6 commit b7aebaa

File tree

2 files changed

+180
-13
lines changed

2 files changed

+180
-13
lines changed

src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json

Lines changed: 180 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
"created_at": "2024-12-06 17:09:18.338",
1212
"tags": [],
1313
"properties": [
14+
{
15+
"property_id": "async_queue",
16+
"value": "true"
17+
},
1418
{
1519
"property_id": "context_window",
1620
"value": "24000"
@@ -30,6 +34,14 @@
3034
}
3135
]
3236
},
37+
{
38+
"property_id": "function_calling",
39+
"value": "true"
40+
},
41+
{
42+
"property_id": "lora",
43+
"value": "true"
44+
},
3345
{
3446
"property_id": "terms",
3547
"value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE"
@@ -65,6 +77,10 @@
6577
"json_schema": {}
6678
}
6779
},
80+
"guided_json": {
81+
"type": "object",
82+
"description": "JSON schema that should be fulfilled for the response."
83+
},
6884
"raw": {
6985
"type": "boolean",
7086
"default": false,
@@ -82,7 +98,7 @@
8298
},
8399
"temperature": {
84100
"type": "number",
85-
"default": 0.6,
101+
"default": 0.15,
86102
"minimum": 0,
87103
"maximum": 5,
88104
"description": "Controls the randomness of the output; higher values produce more random results."
@@ -141,15 +157,67 @@
141157
"type": "string",
142158
"description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')."
143159
},
144-
"content": {
160+
"tool_call_id": {
145161
"type": "string",
146-
"description": "The content of the message as a string."
162+
"description": "The tool call id. Must be supplied for tool calls for Mistral-3. If you don't know what to put here you can fall back to 000000001",
163+
"pattern": "[a-zA-Z0-9]{9}"
164+
},
165+
"content": {
166+
"oneOf": [
167+
{
168+
"type": "string",
169+
"description": "The content of the message as a string."
170+
},
171+
{
172+
"type": "array",
173+
"items": {
174+
"type": "object",
175+
"properties": {
176+
"type": {
177+
"type": "string",
178+
"description": "Type of the content provided"
179+
},
180+
"text": {
181+
"type": "string"
182+
},
183+
"image_url": {
184+
"type": "object",
185+
"properties": {
186+
"url": {
187+
"type": "string",
188+
"pattern": "^data:*",
189+
"description": "image uri with data (e.g. ...). HTTP URL will not be accepted"
190+
}
191+
}
192+
}
193+
}
194+
}
195+
},
196+
{
197+
"type": "object",
198+
"properties": {
199+
"type": {
200+
"type": "string",
201+
"description": "Type of the content provided"
202+
},
203+
"text": {
204+
"type": "string"
205+
},
206+
"image_url": {
207+
"type": "object",
208+
"properties": {
209+
"url": {
210+
"type": "string",
211+
"pattern": "^data:*",
212+
"description": "image uri with data (e.g. ...). HTTP URL will not be accepted"
213+
}
214+
}
215+
}
216+
}
217+
}
218+
]
147219
}
148-
},
149-
"required": [
150-
"role",
151-
"content"
152-
]
220+
}
153221
}
154222
},
155223
"functions": {
@@ -325,6 +393,10 @@
325393
"json_schema": {}
326394
}
327395
},
396+
"guided_json": {
397+
"type": "object",
398+
"description": "JSON schema that should be fufilled for the response."
399+
},
328400
"raw": {
329401
"type": "boolean",
330402
"default": false,
@@ -342,7 +414,7 @@
342414
},
343415
"temperature": {
344416
"type": "number",
345-
"default": 0.6,
417+
"default": 0.15,
346418
"minimum": 0,
347419
"maximum": 5,
348420
"description": "Controls the randomness of the output; higher values produce more random results."
@@ -387,13 +459,100 @@
387459
"required": [
388460
"messages"
389461
]
462+
},
463+
{
464+
"type": "object",
465+
"properties": {
466+
"requests": {
467+
"type": "array",
468+
"items": {
469+
"type": "object",
470+
"properties": {
471+
"external_reference": {
472+
"type": "string",
473+
"description": "User-supplied reference. This field will be present in the response as well it can be used to reference the request and response. It's NOT validated to be unique."
474+
},
475+
"prompt": {
476+
"type": "string",
477+
"minLength": 1,
478+
"description": "Prompt for the text generation model"
479+
},
480+
"stream": {
481+
"type": "boolean",
482+
"default": false,
483+
"description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
484+
},
485+
"max_tokens": {
486+
"type": "integer",
487+
"default": 256,
488+
"description": "The maximum number of tokens to generate in the response."
489+
},
490+
"temperature": {
491+
"type": "number",
492+
"default": 0.6,
493+
"minimum": 0,
494+
"maximum": 5,
495+
"description": "Controls the randomness of the output; higher values produce more random results."
496+
},
497+
"top_p": {
498+
"type": "number",
499+
"minimum": 0,
500+
"maximum": 2,
501+
"description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
502+
},
503+
"seed": {
504+
"type": "integer",
505+
"minimum": 1,
506+
"maximum": 9999999999,
507+
"description": "Random seed for reproducibility of the generation."
508+
},
509+
"repetition_penalty": {
510+
"type": "number",
511+
"minimum": 0,
512+
"maximum": 2,
513+
"description": "Penalty for repeated tokens; higher values discourage repetition."
514+
},
515+
"frequency_penalty": {
516+
"type": "number",
517+
"minimum": 0,
518+
"maximum": 2,
519+
"description": "Decreases the likelihood of the model repeating the same lines verbatim."
520+
},
521+
"presence_penalty": {
522+
"type": "number",
523+
"minimum": 0,
524+
"maximum": 2,
525+
"description": "Increases the likelihood of the model introducing new topics."
526+
},
527+
"response_format": {
528+
"title": "JSON Mode",
529+
"type": "object",
530+
"properties": {
531+
"type": {
532+
"type": "string",
533+
"enum": [
534+
"json_object",
535+
"json_schema"
536+
]
537+
},
538+
"json_schema": {}
539+
}
540+
}
541+
}
542+
}
543+
}
544+
},
545+
"required": [
546+
"requests"
547+
]
390548
}
391549
]
392550
},
393551
"output": {
394552
"oneOf": [
395553
{
396554
"type": "object",
555+
"contentType": "application/json",
397556
"properties": {
398557
"response": {
399558
"type": "string",
@@ -444,7 +603,19 @@
444603
},
445604
{
446605
"type": "string",
606+
"contentType": "text/event-stream",
447607
"format": "binary"
608+
},
609+
{
610+
"type": "object",
611+
"contentType": "application/json",
612+
"title": "Async response",
613+
"properties": {
614+
"request_id": {
615+
"type": "string",
616+
"description": "The async request id that can be used to obtain the results."
617+
}
618+
}
448619
}
449620
]
450621
}

src/content/workers-ai-models/mistral-small-3.1-24b-instruct.json

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@
2929
"currency": "USD"
3030
}
3131
]
32-
},
33-
{
34-
"property_id": "lora",
35-
"value": "true"
3632
}
3733
],
3834
"schema": {

0 commit comments

Comments
 (0)