Skip to content

Commit 333d587

Browse files
Add support for batching, function calling and loras for llama-3.3-70b
1 parent 2a8d1c6 commit 333d587

File tree

2 files changed

+180
-15
lines changed

2 files changed

+180
-15
lines changed

src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json

Lines changed: 180 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
"created_at": "2024-12-06 17:09:18.338",
1212
"tags": [],
1313
"properties": [
14+
{
15+
"property_id": "async_queue",
16+
"value": "true"
17+
},
1418
{
1519
"property_id": "context_window",
1620
"value": "24000"
@@ -30,6 +34,14 @@
3034
}
3135
]
3236
},
37+
{
38+
"property_id": "function_calling",
39+
"value": "true"
40+
},
41+
{
42+
"property_id": "lora",
43+
"value": "true"
44+
},
3345
{
3446
"property_id": "terms",
3547
"value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE"
@@ -53,7 +65,6 @@
5365
},
5466
"response_format": {
5567
"title": "JSON Mode",
56-
"type": "object",
5768
"properties": {
5869
"type": {
5970
"type": "string",
@@ -65,6 +76,10 @@
6576
"json_schema": {}
6677
}
6778
},
79+
"guided_json": {
80+
"type": "object",
81+
"description": "JSON schema that should be fulfilled for the response."
82+
},
6883
"raw": {
6984
"type": "boolean",
7085
"default": false,
@@ -82,7 +97,7 @@
8297
},
8398
"temperature": {
8499
"type": "number",
85-
"default": 0.6,
100+
"default": 0.15,
86101
"minimum": 0,
87102
"maximum": 5,
88103
"description": "Controls the randomness of the output; higher values produce more random results."
@@ -141,15 +156,67 @@
141156
"type": "string",
142157
"description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')."
143158
},
144-
"content": {
159+
"tool_call_id": {
145160
"type": "string",
146-
"description": "The content of the message as a string."
161+
"description": "The tool call id. Must be supplied for tool calls for Mistral-3. If you don't know what to put here you can fall back to 000000001",
162+
"pattern": "[a-zA-Z0-9]{9}"
163+
},
164+
"content": {
165+
"oneOf": [
166+
{
167+
"type": "string",
168+
"description": "The content of the message as a string."
169+
},
170+
{
171+
"type": "array",
172+
"items": {
173+
"type": "object",
174+
"properties": {
175+
"type": {
176+
"type": "string",
177+
"description": "Type of the content provided"
178+
},
179+
"text": {
180+
"type": "string"
181+
},
182+
"image_url": {
183+
"type": "object",
184+
"properties": {
185+
"url": {
186+
"type": "string",
187+
"pattern": "^data:*",
188+
"description": "image uri with data (e.g. ...). HTTP URL will not be accepted"
189+
}
190+
}
191+
}
192+
}
193+
}
194+
},
195+
{
196+
"type": "object",
197+
"properties": {
198+
"type": {
199+
"type": "string",
200+
"description": "Type of the content provided"
201+
},
202+
"text": {
203+
"type": "string"
204+
},
205+
"image_url": {
206+
"type": "object",
207+
"properties": {
208+
"url": {
209+
"type": "string",
210+
"pattern": "^data:*",
211+
"description": "image uri with data (e.g. ...). HTTP URL will not be accepted"
212+
}
213+
}
214+
}
215+
}
216+
}
217+
]
147218
}
148-
},
149-
"required": [
150-
"role",
151-
"content"
152-
]
219+
}
153220
}
154221
},
155222
"functions": {
@@ -313,7 +380,6 @@
313380
},
314381
"response_format": {
315382
"title": "JSON Mode",
316-
"type": "object",
317383
"properties": {
318384
"type": {
319385
"type": "string",
@@ -325,6 +391,10 @@
325391
"json_schema": {}
326392
}
327393
},
394+
"guided_json": {
395+
"type": "object",
396+
"description": "JSON schema that should be fufilled for the response."
397+
},
328398
"raw": {
329399
"type": "boolean",
330400
"default": false,
@@ -342,7 +412,7 @@
342412
},
343413
"temperature": {
344414
"type": "number",
345-
"default": 0.6,
415+
"default": 0.15,
346416
"minimum": 0,
347417
"maximum": 5,
348418
"description": "Controls the randomness of the output; higher values produce more random results."
@@ -387,13 +457,100 @@
387457
"required": [
388458
"messages"
389459
]
460+
},
461+
{
462+
"type": "object",
463+
"properties": {
464+
"requests": {
465+
"type": "array",
466+
"items": {
467+
"type": "object",
468+
"properties": {
469+
"external_reference": {
470+
"type": "string",
471+
"description": "User-supplied reference. This field will be present in the response as well it can be used to reference the request and response. It's NOT validated to be unique."
472+
},
473+
"prompt": {
474+
"type": "string",
475+
"minLength": 1,
476+
"description": "Prompt for the text generation model"
477+
},
478+
"stream": {
479+
"type": "boolean",
480+
"default": false,
481+
"description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
482+
},
483+
"max_tokens": {
484+
"type": "integer",
485+
"default": 256,
486+
"description": "The maximum number of tokens to generate in the response."
487+
},
488+
"temperature": {
489+
"type": "number",
490+
"default": 0.6,
491+
"minimum": 0,
492+
"maximum": 5,
493+
"description": "Controls the randomness of the output; higher values produce more random results."
494+
},
495+
"top_p": {
496+
"type": "number",
497+
"minimum": 0,
498+
"maximum": 2,
499+
"description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
500+
},
501+
"seed": {
502+
"type": "integer",
503+
"minimum": 1,
504+
"maximum": 9999999999,
505+
"description": "Random seed for reproducibility of the generation."
506+
},
507+
"repetition_penalty": {
508+
"type": "number",
509+
"minimum": 0,
510+
"maximum": 2,
511+
"description": "Penalty for repeated tokens; higher values discourage repetition."
512+
},
513+
"frequency_penalty": {
514+
"type": "number",
515+
"minimum": 0,
516+
"maximum": 2,
517+
"description": "Decreases the likelihood of the model repeating the same lines verbatim."
518+
},
519+
"presence_penalty": {
520+
"type": "number",
521+
"minimum": 0,
522+
"maximum": 2,
523+
"description": "Increases the likelihood of the model introducing new topics."
524+
},
525+
"response_format": {
526+
"title": "JSON Mode",
527+
"type": "object",
528+
"properties": {
529+
"type": {
530+
"type": "string",
531+
"enum": [
532+
"json_object",
533+
"json_schema"
534+
]
535+
},
536+
"json_schema": {}
537+
}
538+
}
539+
}
540+
}
541+
}
542+
},
543+
"required": [
544+
"requests"
545+
]
390546
}
391547
]
392548
},
393549
"output": {
394550
"oneOf": [
395551
{
396552
"type": "object",
553+
"contentType": "application/json",
397554
"properties": {
398555
"response": {
399556
"type": "string",
@@ -444,7 +601,19 @@
444601
},
445602
{
446603
"type": "string",
604+
"contentType": "text/event-stream",
447605
"format": "binary"
606+
},
607+
{
608+
"type": "object",
609+
"contentType": "application/json",
610+
"title": "Async response",
611+
"properties": {
612+
"request_id": {
613+
"type": "string",
614+
"description": "The async request id that can be used to obtain the results."
615+
}
616+
}
448617
}
449618
]
450619
}

src/content/workers-ai-models/mistral-small-3.1-24b-instruct.json

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@
2929
"currency": "USD"
3030
}
3131
]
32-
},
33-
{
34-
"property_id": "lora",
35-
"value": "true"
3632
}
3733
],
3834
"schema": {

0 commit comments

Comments
 (0)