|
3 | 3 | { |
4 | 4 | "label": "Intel® AI for Enterprise Inference", |
5 | 5 | "name": "da-enterprise-inference", |
6 | | - "version": "1.0.0", |
| 6 | + "version": "1.2.0", |
7 | 7 | "product_kind": "solution", |
8 | 8 | "tags": [ |
9 | 9 | "opea", |
|
15 | 15 | "intel_gaudi", |
16 | 16 | "enterprise-inference" |
17 | 17 | ], |
18 | | - "short_description": "Intel® AI for Enterprise Inference (Enterprise Inference) is aimed to streamline the deployment and management of inference services on Intel® hardware.", |
19 | | - "long_description": "This solution leverages Terraform to automate the deployment of virtual machines on IBM Cloud, specifically tailored for AI workloads using Intel® Gaudi® 3 AI accelerators. It installs all necessary Intel Gaudi 3 AI accelerator-specific operators and deploys large language models (LLMs) on single node K8S cluster using Intel® AI for Enterprise Inference framework from the Open Platform for Enterprise AI (OPEA). By integrating cutting-edge AI infrastructure with cloud automation, this solution provides a streamlined approach to deploying and managing machine learning models in a scalable and efficient manner. Deploy this to automate the process of VM spin up, manage installation of Intel Gaudi 3 AI accelerator operators and LLM models on K8S cluster using Terraform referring to OPEA's Intel® AI for Enterprise Inference framework.\n\n**Prerequisites:**\n- Review the [Getting Started Guide](https://github.com/opea-project/Enterprise-Inference/blob/main/third_party/IBM/docs/getting-started.md) for deployment instructions\n- Consult the [Sizing Guide](https://github.com/opea-project/Enterprise-Inference/blob/main/third_party/IBM/docs/sizing-guide.md) to understand Intel® Gaudi® 3 AI Accelerator capabilities and choose the optimal configuration for your AI models", |
| 18 | + "short_description": "Intel® AI for Enterprise Inference (Enterprise Inference) is aimed to streamline the deployment and management of inference services on Intel® hardware.", |
| 19 | + "long_description": "This solution automates the provisioning of virtual machines on IBM Cloud using Terraform, optimized for AI workloads powered by Intel® Gaudi® 3 AI accelerators. It deploys a Kubernetes cluster that supports both single-node and multi-node configurations. The multi-node setup includes three control plane nodes for orchestration and cluster management, along with worker nodes equipped with Intel® Gaudi® 3 for high-performance AI inference.The deployment integrates the Intel® AI for Enterprise Inference framework from the Open Platform for Enterprise AI (OPEA), enabling scalable deployment of Large Language Models (LLMs) across various configurations. It also includes key enterprise-grade features such as GenAI Gateway for secure and scalable access to generative AI models, Token Telemetry for detailed tracking and analysis of token usage across inference workloads, and Observability for real-time monitoring of system performance, resource utilization, and model behavior.By combining cutting-edge AI infrastructure with cloud automation, this solution provides a streamlined and efficient approach to deploying and managing machine learning models at scale.\n\n**Prerequisites:**\n- Review the [Getting Started Guide](https://github.com/opea-project/Enterprise-Inference/blob/main/third_party/IBM/docs/getting-started.md) for deployment instructions\n- Consult the [Sizing Guide](https://github.com/opea-project/Enterprise-Inference/blob/main/third_party/IBM/docs/sizing-guide.md) to understand Intel® Gaudi® 3 AI Accelerator capabilities and choose the optimal configuration for your AI models", |
20 | 20 | "features": [ |
21 | 21 | { |
22 | 22 | "title": "Optimized AI Deployment", |
|
219 | 219 | "description": "The contents of the TLS private key (PEM format)", |
220 | 220 | "required": true |
221 | 221 | }, |
222 | | - { |
223 | | - "key": "keycloak_admin_user", |
| 222 | + { |
| 223 | + "key": "deployment_mode", |
224 | 224 | "type": "string", |
225 | | - "default_value": "admin", |
226 | | - "description": "Keycloak admin user name", |
227 | | - "required": true |
| 225 | + "display_name": "Deployment Mode", |
| 226 | + "default_value": "single-node", |
| 227 | + "description": "Deployment mode for the infrastructure (single-node or multi-node)", |
| 228 | + "required": true, |
| 229 | + "options": [ |
| 230 | + { |
| 231 | + "displayname": "Single Node", |
| 232 | + "value": "single-node" |
| 233 | + }, |
| 234 | + { |
| 235 | + "displayname": "Multi Node", |
| 236 | + "value": "multi-node" |
| 237 | + } |
| 238 | + ] |
228 | 239 | }, |
229 | 240 | { |
230 | | - "key": "keycloak_admin_password", |
231 | | - "type": "multiline_secure_value", |
232 | | - "default_value": "", |
233 | | - "description": "Keycloak admin password", |
234 | | - "required": true |
| 241 | + "key": "control_plane_count", |
| 242 | + "type": "number", |
| 243 | + "display_name": "Control plane nodes count", |
| 244 | + "default_value": "3", |
| 245 | + "description": "Number of control plane nodes (1 for single or 3 for HA) - only used in multi-node mode", |
| 246 | + "required": true |
| 247 | + }, |
| 248 | + { |
| 249 | + "key": "worker_gaudi_count", |
| 250 | + "type": "number", |
| 251 | + "display_name": "Worker nodes count", |
| 252 | + "default_value": "1", |
| 253 | + "description": "Number of Gaudi worker nodes for inference - only used in multi-node mode", |
| 254 | + "required": true |
| 255 | + }, |
| 256 | + { |
| 257 | + "key": "control_plane_names", |
| 258 | + "type": "list(string)", |
| 259 | + "display_name": "Control plane names", |
| 260 | + "default_value": "[]", |
| 261 | + "description": "Custom names for control plane nodes. If not provided, defaults to 'inference-control-plane-01', etc." |
| 262 | + }, |
| 263 | + { |
| 264 | + "key": "worker_gaudi_names", |
| 265 | + "type": "list(string)", |
| 266 | + "display_name": "Gaudi worker names", |
| 267 | + "default_value": "[]", |
| 268 | + "description": "Custom names for Gaudi worker nodes. If not provided, defaults to 'inference-workload-gaudi-node-01', etc." |
235 | 269 | }, |
236 | 270 | { |
237 | 271 | "key": "image", |
|
256 | 290 | "type": "string", |
257 | 291 | "default_value": "~/certs/key.pem", |
258 | 292 | "description": "The path to the key file" |
259 | | - }, |
260 | | - { |
261 | | - "key": "keycloak_client_id", |
262 | | - "type": "string", |
263 | | - "default_value": "ibm-app", |
264 | | - "description": "Keycloak client id" |
265 | 293 | }, |
266 | 294 | { |
267 | 295 | "key": "cpu_or_gpu", |
268 | 296 | "type": "string", |
269 | 297 | "default_value": "gaudi3", |
270 | 298 | "description": "This variable specifies where the model should be running" |
271 | 299 | }, |
| 300 | + { |
| 301 | + "key": "vault_pass_code", |
| 302 | + "type": "multiline_secure_value", |
| 303 | + "default_value": "", |
| 304 | + "description": "Enter the master vault pass code", |
| 305 | + "required": true |
| 306 | + }, |
272 | 307 | { |
273 | 308 | "key": "deploy_kubernetes_fresh", |
274 | 309 | "type": "string", |
|
282 | 317 | "description": "This variable specfies whether to deploy NGNIX ingress controller or not" |
283 | 318 | }, |
284 | 319 | { |
285 | | - "key": "deploy_llm_models", |
| 320 | + "key": "deploy_genai_gateway", |
| 321 | + "type": "string", |
| 322 | + "default_value": "yes", |
| 323 | + "description": "This variable specfies whether we need to deploy Gen AI Gateway" |
| 324 | + }, |
| 325 | + { |
| 326 | + "key": "deploy_observability", |
286 | 327 | "type": "string", |
287 | 328 | "default_value": "no", |
288 | | - "description": "This variable specfies whether we need to deploy LLM models" |
| 329 | + "description": "This variable specfies whether we need to run observability" |
289 | 330 | }, |
290 | 331 | { |
291 | | - "key": "deploy_keycloak_apisix", |
| 332 | + "key": "deploy_llm_models", |
292 | 333 | "type": "string", |
293 | | - "default_value": "yes", |
294 | | - "description": "This variable specfies whether we need to run keycloak and Apisix components" |
| 334 | + "default_value": "no", |
| 335 | + "description": "This variable specfies whether we need to deploy LLM models" |
295 | 336 | } |
296 | 337 | ] |
297 | 338 | }, |
|
431 | 472 | "type": "resource_group" |
432 | 473 | } |
433 | 474 | }, |
434 | | - { |
| 475 | + { |
435 | 476 | "key": "user_cert", |
436 | 477 | "type": "multiline_secure_value", |
437 | 478 | "display_name": "Full chain Certificate", |
438 | 479 | "default_value": "-----BEGIN CERTIFICATE-----", |
439 | 480 | "description": "The contents of the TLS certificate (PEM format)", |
440 | 481 | "required": true |
441 | 482 | }, |
442 | | - { |
| 483 | + { |
443 | 484 | "key": "user_key", |
444 | 485 | "type": "multiline_secure_value", |
445 | 486 | "display_name": "Certificate key", |
|
448 | 489 | "required": true |
449 | 490 | }, |
450 | 491 | { |
451 | | - "key": "keycloak_admin_user", |
| 492 | + "key": "ssh_allowed_cidr", |
452 | 493 | "type": "string", |
453 | | - "default_value": "admin", |
454 | | - "description": "Keycloak admin user name", |
| 494 | + "default_value": "\"\"", |
| 495 | + "description": "CIDR block(s) allowed for SSH, HTTP, and HTTPS access. Enter a single CIDR or comma-separated list like: 198.0.0.0/8,192.0.0.0/8,169.0.0.0/8. Change to your organization's IP range for better security. For development/testing use 0.0.0.0/0", |
455 | 496 | "required": true |
456 | 497 | }, |
457 | 498 | { |
458 | | - "key": "keycloak_admin_password", |
459 | | - "type": "multiline_secure_value", |
460 | | - "default_value": "", |
461 | | - "description": "Keycloak admin password", |
462 | | - "required": true |
463 | | - }, |
464 | | - { |
465 | | - "key": "ssh_allowed_cidr", |
| 499 | + "key": "deployment_mode", |
466 | 500 | "type": "string", |
467 | | - "default_value": "0.0.0.0/0", |
468 | | - "description": "CIDR block allowed for SSH access. Change to your organization's IP range for better security and for non-developement deployments", |
469 | | - "required": true |
| 501 | + "display_name": "Deployment Mode", |
| 502 | + "default_value": "single-node", |
| 503 | + "description": "Deployment mode for the infrastructure (single-node or multi-node)", |
| 504 | + "required": true, |
| 505 | + "options": [ |
| 506 | + { |
| 507 | + "displayname": "Single Node", |
| 508 | + "value": "single-node" |
| 509 | + }, |
| 510 | + { |
| 511 | + "displayname": "Multi Node", |
| 512 | + "value": "multi-node" |
| 513 | + } |
| 514 | + ] |
470 | 515 | }, |
471 | | - { |
472 | | - "key": "image", |
473 | | - "type": "string", |
474 | | - "default_value": "gaudi3-os-u22-01-21-0", |
475 | | - "description": "This variable image name to be used for IAAS bringup on VSI instance" |
| 516 | + { |
| 517 | + "key": "control_plane_count", |
| 518 | + "type": "number", |
| 519 | + "display_name": "Control plane nodes count", |
| 520 | + "default_value": "3", |
| 521 | + "description": "Number of control plane nodes (1 for single or 3 for HA) - only used in multi-node mode", |
| 522 | + "required": true |
476 | 523 | }, |
477 | | - { |
478 | | - "key": "cpu_or_gpu", |
479 | | - "type": "string", |
480 | | - "default_value": "gaudi3", |
481 | | - "description": "This variable specifies where the model should be running" |
| 524 | + { |
| 525 | + "key": "worker_gaudi_count", |
| 526 | + "type": "number", |
| 527 | + "display_name": "Worker nodes count", |
| 528 | + "default_value": "1", |
| 529 | + "description": "Number of Gaudi worker nodes for inference - only used in multi-node mode", |
| 530 | + "required": true |
| 531 | + }, |
| 532 | + { |
| 533 | + "key": "vault_pass_code", |
| 534 | + "type": "multiline_secure_value", |
| 535 | + "default_value": "", |
| 536 | + "description": "Enter the master vault pass code", |
| 537 | + "required": true |
482 | 538 | } |
483 | 539 |
|
484 | 540 | ] |
|
0 commit comments