|
11 | 11 | "created_at": "2024-12-06 17:09:18.338", |
12 | 12 | "tags": [], |
13 | 13 | "properties": [ |
| 14 | + { |
| 15 | + "property_id": "async_queue", |
| 16 | + "value": "true" |
| 17 | + }, |
14 | 18 | { |
15 | 19 | "property_id": "context_window", |
16 | 20 | "value": "24000" |
|
30 | 34 | } |
31 | 35 | ] |
32 | 36 | }, |
| 37 | + { |
| 38 | + "property_id": "function_calling", |
| 39 | + "value": "true" |
| 40 | + }, |
33 | 41 | { |
34 | 42 | "property_id": "terms", |
35 | 43 | "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE" |
|
387 | 395 | "required": [ |
388 | 396 | "messages" |
389 | 397 | ] |
| 398 | + }, |
| 399 | + { |
| 400 | + "title": "Async Batch", |
| 401 | + "type": "object", |
| 402 | + "properties": { |
| 403 | + "requests": { |
| 404 | + "type": "array", |
| 405 | + "items": { |
| 406 | + "type": "object", |
| 407 | + "properties": { |
| 408 | + "external_reference": { |
| 409 | + "type": "string", |
| 410 | + "description": "User-supplied reference. This field will be present in the response as well it can be used to reference the request and response. It's NOT validated to be unique." |
| 411 | + }, |
| 412 | + "prompt": { |
| 413 | + "type": "string", |
| 414 | + "minLength": 1, |
| 415 | + "description": "Prompt for the text generation model" |
| 416 | + }, |
| 417 | + "stream": { |
| 418 | + "type": "boolean", |
| 419 | + "default": false, |
| 420 | + "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." |
| 421 | + }, |
| 422 | + "max_tokens": { |
| 423 | + "type": "integer", |
| 424 | + "default": 256, |
| 425 | + "description": "The maximum number of tokens to generate in the response." |
| 426 | + }, |
| 427 | + "temperature": { |
| 428 | + "type": "number", |
| 429 | + "default": 0.6, |
| 430 | + "minimum": 0, |
| 431 | + "maximum": 5, |
| 432 | + "description": "Controls the randomness of the output; higher values produce more random results." |
| 433 | + }, |
| 434 | + "top_p": { |
| 435 | + "type": "number", |
| 436 | + "minimum": 0, |
| 437 | + "maximum": 2, |
| 438 | + "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." |
| 439 | + }, |
| 440 | + "seed": { |
| 441 | + "type": "integer", |
| 442 | + "minimum": 1, |
| 443 | + "maximum": 9999999999, |
| 444 | + "description": "Random seed for reproducibility of the generation." |
| 445 | + }, |
| 446 | + "repetition_penalty": { |
| 447 | + "type": "number", |
| 448 | + "minimum": 0, |
| 449 | + "maximum": 2, |
| 450 | + "description": "Penalty for repeated tokens; higher values discourage repetition." |
| 451 | + }, |
| 452 | + "frequency_penalty": { |
| 453 | + "type": "number", |
| 454 | + "minimum": 0, |
| 455 | + "maximum": 2, |
| 456 | + "description": "Decreases the likelihood of the model repeating the same lines verbatim." |
| 457 | + }, |
| 458 | + "presence_penalty": { |
| 459 | + "type": "number", |
| 460 | + "minimum": 0, |
| 461 | + "maximum": 2, |
| 462 | + "description": "Increases the likelihood of the model introducing new topics." |
| 463 | + }, |
| 464 | + "response_format": { |
| 465 | + "title": "JSON Mode", |
| 466 | + "type": "object", |
| 467 | + "properties": { |
| 468 | + "type": { |
| 469 | + "type": "string", |
| 470 | + "enum": [ |
| 471 | + "json_object", |
| 472 | + "json_schema" |
| 473 | + ] |
| 474 | + }, |
| 475 | + "json_schema": {} |
| 476 | + } |
| 477 | + } |
| 478 | + } |
| 479 | + } |
| 480 | + } |
| 481 | + } |
390 | 482 | } |
391 | 483 | ] |
392 | 484 | }, |
393 | 485 | "output": { |
394 | 486 | "oneOf": [ |
395 | 487 | { |
396 | 488 | "type": "object", |
| 489 | + "contentType": "application/json", |
397 | 490 | "properties": { |
398 | 491 | "response": { |
399 | 492 | "type": "string", |
|
444 | 537 | }, |
445 | 538 | { |
446 | 539 | "type": "string", |
| 540 | + "contentType": "text/event-stream", |
447 | 541 | "format": "binary" |
| 542 | + }, |
| 543 | + { |
| 544 | + "type": "object", |
| 545 | + "contentType": "application/json", |
| 546 | + "title": "Async response", |
| 547 | + "properties": { |
| 548 | + "request_id": { |
| 549 | + "type": "string", |
| 550 | + "description": "The async request id that can be used to obtain the results." |
| 551 | + } |
| 552 | + } |
448 | 553 | } |
449 | 554 | ] |
450 | 555 | } |
|
0 commit comments