Skip to content

Commit 9e2de05

Browse files
Add support for batching, function calling and loras for llama-3.3-70b
1 parent 2a8d1c6 commit 9e2de05

File tree

2 files changed

+176
-41
lines changed

2 files changed

+176
-41
lines changed

src/content/workers-ai-models/llama-3.3-70b-instruct-fp8-fast.json

Lines changed: 176 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
"created_at": "2024-12-06 17:09:18.338",
1212
"tags": [],
1313
"properties": [
14+
{
15+
"property_id": "async_queue",
16+
"value": "true"
17+
},
1418
{
1519
"property_id": "context_window",
1620
"value": "24000"
@@ -30,6 +34,14 @@
3034
}
3135
]
3236
},
37+
{
38+
"property_id": "function_calling",
39+
"value": "true"
40+
},
41+
{
42+
"property_id": "lora",
43+
"value": "true"
44+
},
3345
{
3446
"property_id": "terms",
3547
"value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE"
@@ -47,23 +59,9 @@
4759
"minLength": 1,
4860
"description": "The input text prompt for the model to generate a response."
4961
},
50-
"lora": {
51-
"type": "string",
52-
"description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model."
53-
},
54-
"response_format": {
55-
"title": "JSON Mode",
62+
"guided_json": {
5663
"type": "object",
57-
"properties": {
58-
"type": {
59-
"type": "string",
60-
"enum": [
61-
"json_object",
62-
"json_schema"
63-
]
64-
},
65-
"json_schema": {}
66-
}
64+
"description": "JSON schema that should be fulfilled for the response."
6765
},
6866
"raw": {
6967
"type": "boolean",
@@ -82,7 +80,7 @@
8280
},
8381
"temperature": {
8482
"type": "number",
85-
"default": 0.6,
83+
"default": 0.15,
8684
"minimum": 0,
8785
"maximum": 5,
8886
"description": "Controls the randomness of the output; higher values produce more random results."
@@ -141,15 +139,67 @@
141139
"type": "string",
142140
"description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')."
143141
},
144-
"content": {
142+
"tool_call_id": {
145143
"type": "string",
146-
"description": "The content of the message as a string."
144+
"description": "The tool call id. Must be supplied for tool calls for Mistral-3. If you don't know what to put here you can fall back to 000000001",
145+
"pattern": "[a-zA-Z0-9]{9}"
146+
},
147+
"content": {
148+
"oneOf": [
149+
{
150+
"type": "string",
151+
"description": "The content of the message as a string."
152+
},
153+
{
154+
"type": "array",
155+
"items": {
156+
"type": "object",
157+
"properties": {
158+
"type": {
159+
"type": "string",
160+
"description": "Type of the content provided"
161+
},
162+
"text": {
163+
"type": "string"
164+
},
165+
"image_url": {
166+
"type": "object",
167+
"properties": {
168+
"url": {
169+
"type": "string",
170+
"pattern": "^data:*",
171+
"description": "image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted"
172+
}
173+
}
174+
}
175+
}
176+
}
177+
},
178+
{
179+
"type": "object",
180+
"properties": {
181+
"type": {
182+
"type": "string",
183+
"description": "Type of the content provided"
184+
},
185+
"text": {
186+
"type": "string"
187+
},
188+
"image_url": {
189+
"type": "object",
190+
"properties": {
191+
"url": {
192+
"type": "string",
193+
"pattern": "^data:*",
194+
"description": "image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted"
195+
}
196+
}
197+
}
198+
}
199+
}
200+
]
147201
}
148-
},
149-
"required": [
150-
"role",
151-
"content"
152-
]
202+
}
153203
}
154204
},
155205
"functions": {
@@ -311,19 +361,9 @@
311361
]
312362
}
313363
},
314-
"response_format": {
315-
"title": "JSON Mode",
364+
"guided_json": {
316365
"type": "object",
317-
"properties": {
318-
"type": {
319-
"type": "string",
320-
"enum": [
321-
"json_object",
322-
"json_schema"
323-
]
324-
},
325-
"json_schema": {}
326-
}
366+
"description": "JSON schema that should be fufilled for the response."
327367
},
328368
"raw": {
329369
"type": "boolean",
@@ -342,7 +382,7 @@
342382
},
343383
"temperature": {
344384
"type": "number",
345-
"default": 0.6,
385+
"default": 0.15,
346386
"minimum": 0,
347387
"maximum": 5,
348388
"description": "Controls the randomness of the output; higher values produce more random results."
@@ -387,13 +427,100 @@
387427
"required": [
388428
"messages"
389429
]
430+
},
431+
{
432+
"type": "object",
433+
"properties": {
434+
"requests": {
435+
"type": "array",
436+
"items": {
437+
"type": "object",
438+
"properties": {
439+
"external_reference": {
440+
"type": "string",
441+
"description": "User-supplied reference. This field will be present in the response as well it can be used to reference the request and response. It's NOT validated to be unique."
442+
},
443+
"prompt": {
444+
"type": "string",
445+
"minLength": 1,
446+
"description": "Prompt for the text generation model"
447+
},
448+
"stream": {
449+
"type": "boolean",
450+
"default": false,
451+
"description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events."
452+
},
453+
"max_tokens": {
454+
"type": "integer",
455+
"default": 256,
456+
"description": "The maximum number of tokens to generate in the response."
457+
},
458+
"temperature": {
459+
"type": "number",
460+
"default": 0.6,
461+
"minimum": 0,
462+
"maximum": 5,
463+
"description": "Controls the randomness of the output; higher values produce more random results."
464+
},
465+
"top_p": {
466+
"type": "number",
467+
"minimum": 0,
468+
"maximum": 2,
469+
"description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses."
470+
},
471+
"seed": {
472+
"type": "integer",
473+
"minimum": 1,
474+
"maximum": 9999999999,
475+
"description": "Random seed for reproducibility of the generation."
476+
},
477+
"repetition_penalty": {
478+
"type": "number",
479+
"minimum": 0,
480+
"maximum": 2,
481+
"description": "Penalty for repeated tokens; higher values discourage repetition."
482+
},
483+
"frequency_penalty": {
484+
"type": "number",
485+
"minimum": 0,
486+
"maximum": 2,
487+
"description": "Decreases the likelihood of the model repeating the same lines verbatim."
488+
},
489+
"presence_penalty": {
490+
"type": "number",
491+
"minimum": 0,
492+
"maximum": 2,
493+
"description": "Increases the likelihood of the model introducing new topics."
494+
},
495+
"response_format": {
496+
"title": "JSON Mode",
497+
"type": "object",
498+
"properties": {
499+
"type": {
500+
"type": "string",
501+
"enum": [
502+
"json_object",
503+
"json_schema"
504+
]
505+
},
506+
"json_schema": {}
507+
}
508+
}
509+
}
510+
}
511+
}
512+
},
513+
"required": [
514+
"requests"
515+
]
390516
}
391517
]
392518
},
393519
"output": {
394520
"oneOf": [
395521
{
396522
"type": "object",
523+
"contentType": "application/json",
397524
"properties": {
398525
"response": {
399526
"type": "string",
@@ -444,7 +571,19 @@
444571
},
445572
{
446573
"type": "string",
574+
"contentType": "text/event-stream",
447575
"format": "binary"
576+
},
577+
{
578+
"type": "object",
579+
"contentType": "application/json",
580+
"title": "Async response",
581+
"properties": {
582+
"request_id": {
583+
"type": "string",
584+
"description": "The async request id that can be used to obtain the results."
585+
}
586+
}
448587
}
449588
]
450589
}

src/content/workers-ai-models/mistral-small-3.1-24b-instruct.json

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@
2929
"currency": "USD"
3030
}
3131
]
32-
},
33-
{
34-
"property_id": "lora",
35-
"value": "true"
3632
}
3733
],
3834
"schema": {

0 commit comments

Comments
 (0)