|
512 | 512 | } |
513 | 513 | } |
514 | 514 | }, |
| 515 | + "/v1/models/add": { |
| 516 | + "post": { |
| 517 | + "operationId": "ModelsController_addModel", |
| 518 | + "summary": "Add a model", |
| 519 | + "description": "Add a new model configuration to the system.", |
| 520 | + "requestBody": { |
| 521 | + "required": true, |
| 522 | + "content": { |
| 523 | + "application/json": { |
| 524 | + "schema": { |
| 525 | + "$ref": "#/components/schemas/AddModelRequest" |
| 526 | + } |
| 527 | + } |
| 528 | + } |
| 529 | + }, |
| 530 | + "responses": { |
| 531 | + "200": { |
| 532 | + "description": "Successful response", |
| 533 | + "content": { |
| 534 | + "application/json": { |
| 535 | + "schema": { |
| 536 | + "type": "object", |
| 537 | + "properties": { |
| 538 | + "message": { |
| 539 | + "type": "string" |
| 540 | + }, |
| 541 | + "model": { |
| 542 | + "type": "object", |
| 543 | + "properties": { |
| 544 | + "model": { |
| 545 | + "type": "string" |
| 546 | + }, |
| 547 | + "engine": { |
| 548 | + "type": "string" |
| 549 | + }, |
| 550 | + "version": { |
| 551 | + "type": "string" |
| 552 | + } |
| 553 | + } |
| 554 | + } |
| 555 | + } |
| 556 | + }, |
| 557 | + "example": { |
| 558 | + "message": "Model added successfully!", |
| 559 | + "model": { |
| 560 | + "model": "claude-3-5-sonnet-20241022", |
| 561 | + "engine": "anthropic", |
| 562 | + "version": "2023-06-01" |
| 563 | + } |
| 564 | + } |
| 565 | + } |
| 566 | + } |
| 567 | + }, |
| 568 | + "400": { |
| 569 | + "description": "Bad request", |
| 570 | + "content": { |
| 571 | + "application/json": { |
| 572 | + "schema": { |
| 573 | + "$ref": "#/components/schemas/SimpleErrorResponse" |
| 574 | + } |
| 575 | + } |
| 576 | + } |
| 577 | + } |
| 578 | + }, |
| 579 | + "tags": ["Pulling Models"] |
| 580 | + } |
| 581 | + }, |
515 | 582 | "/v1/models": { |
516 | 583 | "get": { |
517 | 584 | "operationId": "ModelsController_findAll", |
|
1417 | 1484 | "required": true, |
1418 | 1485 | "schema": { |
1419 | 1486 | "type": "string", |
1420 | | - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], |
| 1487 | + "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm", "openai", "anthropic"], |
1421 | 1488 | "default": "llama-cpp" |
1422 | 1489 | }, |
1423 | 1490 | "description": "The type of engine" |
|
1439 | 1506 | "type": "string", |
1440 | 1507 | "description": "The variant of the engine to install (optional)", |
1441 | 1508 | "example": "mac-arm64" |
| 1509 | + }, |
| 1510 | + "type": { |
| 1511 | + "type": "string", |
| 1512 | + "description": "The type of connection", |
| 1513 | + "example": "remote" |
| 1514 | + }, |
| 1515 | + "url": { |
| 1516 | + "type": "string", |
| 1517 | + "description": "The URL for the API endpoint", |
| 1518 | + "example": "https://api.openai.com" |
| 1519 | + }, |
| 1520 | + "api_key": { |
| 1521 | + "type": "string", |
| 1522 | + "description": "The API key for authentication", |
| 1523 | + "example": "" |
| 1524 | + }, |
| 1525 | + "metadata": { |
| 1526 | + "type": "object", |
| 1527 | + "properties": { |
| 1528 | + "get_models_url": { |
| 1529 | + "type": "string", |
| 1530 | + "description": "The URL to get models", |
| 1531 | + "example": "https://api.openai.com/v1/models" |
| 1532 | + } |
| 1533 | + } |
1442 | 1534 | } |
1443 | 1535 | } |
1444 | 1536 | } |
|
1475 | 1567 | "required": true, |
1476 | 1568 | "schema": { |
1477 | 1569 | "type": "string", |
1478 | | - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], |
| 1570 | + "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm", "openai", "anthropic"], |
1479 | 1571 | "default": "llama-cpp" |
1480 | 1572 | }, |
1481 | 1573 | "description": "The type of engine" |
|
1690 | 1782 | "required": true, |
1691 | 1783 | "schema": { |
1692 | 1784 | "type": "string", |
1693 | | - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], |
| 1785 | + "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm", "openai", "anthropic"], |
1694 | 1786 | "default": "llama-cpp" |
1695 | 1787 | }, |
1696 | 1788 | "description": "The name of the engine to update" |
|
3623 | 3715 | } |
3624 | 3716 | } |
3625 | 3717 | }, |
| 3718 | + "AddModelRequest": { |
| 3719 | + "type": "object", |
| 3720 | + "required": ["model", "engine", "version", "inference_params", "TransformReq", "TransformResp", "metadata"], |
| 3721 | + "properties": { |
| 3722 | + "model": { |
| 3723 | + "type": "string", |
| 3724 | + "description": "The identifier of the model." |
| 3725 | + }, |
| 3726 | + "api_key_template": { |
| 3727 | + "type": "string", |
| 3728 | + "description": "Template for the API key header." |
| 3729 | + }, |
| 3730 | + "engine": { |
| 3731 | + "type": "string", |
| 3732 | + "description": "The engine used for the model." |
| 3733 | + }, |
| 3734 | + "version": { |
| 3735 | + "type": "string", |
| 3736 | + "description": "The version of the model." |
| 3737 | + }, |
| 3738 | + "inference_params": { |
| 3739 | + "type": "object", |
| 3740 | + "properties": { |
| 3741 | + "temperature": { |
| 3742 | + "type": "number" |
| 3743 | + }, |
| 3744 | + "top_p": { |
| 3745 | + "type": "number" |
| 3746 | + }, |
| 3747 | + "frequency_penalty": { |
| 3748 | + "type": "number" |
| 3749 | + }, |
| 3750 | + "presence_penalty": { |
| 3751 | + "type": "number" |
| 3752 | + }, |
| 3753 | + "max_tokens": { |
| 3754 | + "type": "integer" |
| 3755 | + }, |
| 3756 | + "stream": { |
| 3757 | + "type": "boolean" |
| 3758 | + } |
| 3759 | + } |
| 3760 | + }, |
| 3761 | + "TransformReq": { |
| 3762 | + "type": "object", |
| 3763 | + "properties": { |
| 3764 | + "get_models": { |
| 3765 | + "type": "object" |
| 3766 | + }, |
| 3767 | + "chat_completions": { |
| 3768 | + "type": "object", |
| 3769 | + "properties": { |
| 3770 | + "url": { |
| 3771 | + "type": "string" |
| 3772 | + }, |
| 3773 | + "template": { |
| 3774 | + "type": "string" |
| 3775 | + } |
| 3776 | + } |
| 3777 | + }, |
| 3778 | + "embeddings": { |
| 3779 | + "type": "object" |
| 3780 | + } |
| 3781 | + } |
| 3782 | + }, |
| 3783 | + "TransformResp": { |
| 3784 | + "type": "object", |
| 3785 | + "properties": { |
| 3786 | + "chat_completions": { |
| 3787 | + "type": "object", |
| 3788 | + "properties": { |
| 3789 | + "template": { |
| 3790 | + "type": "string" |
| 3791 | + } |
| 3792 | + } |
| 3793 | + }, |
| 3794 | + "embeddings": { |
| 3795 | + "type": "object" |
| 3796 | + } |
| 3797 | + } |
| 3798 | + }, |
| 3799 | + "metadata": { |
| 3800 | + "type": "object", |
| 3801 | + "properties": { |
| 3802 | + "author": { |
| 3803 | + "type": "string" |
| 3804 | + }, |
| 3805 | + "description": { |
| 3806 | + "type": "string" |
| 3807 | + }, |
| 3808 | + "end_point": { |
| 3809 | + "type": "string" |
| 3810 | + }, |
| 3811 | + "logo": { |
| 3812 | + "type": "string" |
| 3813 | + }, |
| 3814 | + "api_key_url": { |
| 3815 | + "type": "string" |
| 3816 | + } |
| 3817 | + } |
| 3818 | + } |
| 3819 | + } |
| 3820 | + }, |
3626 | 3821 | "CreateModelDto": { |
3627 | 3822 | "type": "object", |
3628 | 3823 | "properties": { |
|
4292 | 4487 | "type": "integer", |
4293 | 4488 | "description": "Number of GPU layers.", |
4294 | 4489 | "example": 33 |
| 4490 | + }, |
| 4491 | + "api_key_template": { |
| 4492 | + "type": "string", |
| 4493 | + "description": "Template for the API key header." |
| 4494 | + }, |
| 4495 | + "version": { |
| 4496 | + "type": "string", |
| 4497 | + "description": "The version of the model." |
| 4498 | + }, |
| 4499 | + "inference_params": { |
| 4500 | + "type": "object", |
| 4501 | + "properties": { |
| 4502 | + "temperature": { |
| 4503 | + "type": "number" |
| 4504 | + }, |
| 4505 | + "top_p": { |
| 4506 | + "type": "number" |
| 4507 | + }, |
| 4508 | + "frequency_penalty": { |
| 4509 | + "type": "number" |
| 4510 | + }, |
| 4511 | + "presence_penalty": { |
| 4512 | + "type": "number" |
| 4513 | + }, |
| 4514 | + "max_tokens": { |
| 4515 | + "type": "integer" |
| 4516 | + }, |
| 4517 | + "stream": { |
| 4518 | + "type": "boolean" |
| 4519 | + } |
| 4520 | + } |
4295 | 4521 | } |
4296 | 4522 | } |
4297 | 4523 | }, |
|
0 commit comments