Skip to content

Commit aa99bb2

Browse files
Gen-AI Gateway Integeration DA-1.2.0 changes for IBM Cloud (#25)
Signed-off-by: amberjain1 <[email protected]> Signed-off-by: psurabh <[email protected]> Signed-off-by: AhmedSeemalK <[email protected]> Co-authored-by: vhpintel <[email protected]>
1 parent 2b06ff4 commit aa99bb2

33 files changed

+2091
-630
lines changed

ibm_catalog.json

Lines changed: 106 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
{
44
"label": "Intel® AI for Enterprise Inference",
55
"name": "da-enterprise-inference",
6-
"version": "1.0.0",
6+
"version": "1.2.0",
77
"product_kind": "solution",
88
"tags": [
99
"opea",
@@ -15,8 +15,8 @@
1515
"intel_gaudi",
1616
"enterprise-inference"
1717
],
18-
"short_description": "Intel® AI for Enterprise Inference (Enterprise Inference) is aimed to streamline the deployment and management of inference services on Intel® hardware.",
19-
"long_description": "This solution leverages Terraform to automate the deployment of virtual machines on IBM Cloud, specifically tailored for AI workloads using Intel® Gaudi® 3 AI accelerators. It installs all necessary Intel Gaudi 3 AI accelerator-specific operators and deploys large language models (LLMs) on single node K8S cluster using Intel® AI for Enterprise Inference framework from the Open Platform for Enterprise AI (OPEA). By integrating cutting-edge AI infrastructure with cloud automation, this solution provides a streamlined approach to deploying and managing machine learning models in a scalable and efficient manner. Deploy this to automate the process of VM spin up, manage installation of Intel Gaudi 3 AI accelerator operators and LLM models on K8S cluster using Terraform referring to OPEA's Intel® AI for Enterprise Inference framework.\n\n**Prerequisites:**\n- Review the [Getting Started Guide](https://github.com/opea-project/Enterprise-Inference/blob/main/third_party/IBM/docs/getting-started.md) for deployment instructions\n- Consult the [Sizing Guide](https://github.com/opea-project/Enterprise-Inference/blob/main/third_party/IBM/docs/sizing-guide.md) to understand Intel® Gaudi® 3 AI Accelerator capabilities and choose the optimal configuration for your AI models",
18+
"short_description": "Intel® AI for Enterprise Inference (Enterprise Inference) is aimed to streamline the deployment and management of inference services on Intel® hardware.",
19+
"long_description": "This solution automates the provisioning of virtual machines on IBM Cloud using Terraform, optimized for AI workloads powered by Intel® Gaudi® 3 AI accelerators. It deploys a Kubernetes cluster that supports both single-node and multi-node configurations. The multi-node setup includes three control plane nodes for orchestration and cluster management, along with worker nodes equipped with Intel® Gaudi® 3 for high-performance AI inference.The deployment integrates the Intel® AI for Enterprise Inference framework from the Open Platform for Enterprise AI (OPEA), enabling scalable deployment of Large Language Models (LLMs) across various configurations. It also includes key enterprise-grade features such as GenAI Gateway for secure and scalable access to generative AI models, Token Telemetry for detailed tracking and analysis of token usage across inference workloads, and Observability for real-time monitoring of system performance, resource utilization, and model behavior.By combining cutting-edge AI infrastructure with cloud automation, this solution provides a streamlined and efficient approach to deploying and managing machine learning models at scale.\n\n**Prerequisites:**\n- Review the [Getting Started Guide](https://github.com/opea-project/Enterprise-Inference/blob/main/third_party/IBM/docs/getting-started.md) for deployment instructions\n- Consult the [Sizing Guide](https://github.com/opea-project/Enterprise-Inference/blob/main/third_party/IBM/docs/sizing-guide.md) to understand Intel® Gaudi® 3 AI Accelerator capabilities and choose the optimal configuration for your AI models",
2020
"features": [
2121
{
2222
"title": "Optimized AI Deployment",
@@ -219,19 +219,53 @@
219219
"description": "The contents of the TLS private key (PEM format)",
220220
"required": true
221221
},
222-
{
223-
"key": "keycloak_admin_user",
222+
{
223+
"key": "deployment_mode",
224224
"type": "string",
225-
"default_value": "admin",
226-
"description": "Keycloak admin user name",
227-
"required": true
225+
"display_name": "Deployment Mode",
226+
"default_value": "single-node",
227+
"description": "Deployment mode for the infrastructure (single-node or multi-node)",
228+
"required": true,
229+
"options": [
230+
{
231+
"displayname": "Single Node",
232+
"value": "single-node"
233+
},
234+
{
235+
"displayname": "Multi Node",
236+
"value": "multi-node"
237+
}
238+
]
228239
},
229240
{
230-
"key": "keycloak_admin_password",
231-
"type": "multiline_secure_value",
232-
"default_value": "",
233-
"description": "Keycloak admin password",
234-
"required": true
241+
"key": "control_plane_count",
242+
"type": "number",
243+
"display_name": "Control plane nodes count",
244+
"default_value": "3",
245+
"description": "Number of control plane nodes (1 for single or 3 for HA) - only used in multi-node mode",
246+
"required": true
247+
},
248+
{
249+
"key": "worker_gaudi_count",
250+
"type": "number",
251+
"display_name": "Worker nodes count",
252+
"default_value": "1",
253+
"description": "Number of Gaudi worker nodes for inference - only used in multi-node mode",
254+
"required": true
255+
},
256+
{
257+
"key": "control_plane_names",
258+
"type": "list(string)",
259+
"display_name": "Control plane names",
260+
"default_value": "[]",
261+
"description": "Custom names for control plane nodes. If not provided, defaults to 'inference-control-plane-01', etc."
262+
},
263+
{
264+
"key": "worker_gaudi_names",
265+
"type": "list(string)",
266+
"display_name": "Gaudi worker names",
267+
"default_value": "[]",
268+
"description": "Custom names for Gaudi worker nodes. If not provided, defaults to 'inference-workload-gaudi-node-01', etc."
235269
},
236270
{
237271
"key": "image",
@@ -256,19 +290,20 @@
256290
"type": "string",
257291
"default_value": "~/certs/key.pem",
258292
"description": "The path to the key file"
259-
},
260-
{
261-
"key": "keycloak_client_id",
262-
"type": "string",
263-
"default_value": "ibm-app",
264-
"description": "Keycloak client id"
265293
},
266294
{
267295
"key": "cpu_or_gpu",
268296
"type": "string",
269297
"default_value": "gaudi3",
270298
"description": "This variable specifies where the model should be running"
271299
},
300+
{
301+
"key": "vault_pass_code",
302+
"type": "multiline_secure_value",
303+
"default_value": "",
304+
"description": "Enter the master vault pass code",
305+
"required": true
306+
},
272307
{
273308
"key": "deploy_kubernetes_fresh",
274309
"type": "string",
@@ -282,16 +317,22 @@
282317
"description": "This variable specfies whether to deploy NGNIX ingress controller or not"
283318
},
284319
{
285-
"key": "deploy_llm_models",
320+
"key": "deploy_genai_gateway",
321+
"type": "string",
322+
"default_value": "yes",
323+
"description": "This variable specfies whether we need to deploy Gen AI Gateway"
324+
},
325+
{
326+
"key": "deploy_observability",
286327
"type": "string",
287328
"default_value": "no",
288-
"description": "This variable specfies whether we need to deploy LLM models"
329+
"description": "This variable specfies whether we need to run observability"
289330
},
290331
{
291-
"key": "deploy_keycloak_apisix",
332+
"key": "deploy_llm_models",
292333
"type": "string",
293-
"default_value": "yes",
294-
"description": "This variable specfies whether we need to run keycloak and Apisix components"
334+
"default_value": "no",
335+
"description": "This variable specfies whether we need to deploy LLM models"
295336
}
296337
]
297338
},
@@ -431,15 +472,15 @@
431472
"type": "resource_group"
432473
}
433474
},
434-
{
475+
{
435476
"key": "user_cert",
436477
"type": "multiline_secure_value",
437478
"display_name": "Full chain Certificate",
438479
"default_value": "-----BEGIN CERTIFICATE-----",
439480
"description": "The contents of the TLS certificate (PEM format)",
440481
"required": true
441482
},
442-
{
483+
{
443484
"key": "user_key",
444485
"type": "multiline_secure_value",
445486
"display_name": "Certificate key",
@@ -448,37 +489,52 @@
448489
"required": true
449490
},
450491
{
451-
"key": "keycloak_admin_user",
492+
"key": "ssh_allowed_cidr",
452493
"type": "string",
453-
"default_value": "admin",
454-
"description": "Keycloak admin user name",
494+
"default_value": "\"\"",
495+
"description": "CIDR block(s) allowed for SSH, HTTP, and HTTPS access. Enter a single CIDR or comma-separated list like: 198.0.0.0/8,192.0.0.0/8,169.0.0.0/8. Change to your organization's IP range for better security. For development/testing use 0.0.0.0/0",
455496
"required": true
456497
},
457498
{
458-
"key": "keycloak_admin_password",
459-
"type": "multiline_secure_value",
460-
"default_value": "",
461-
"description": "Keycloak admin password",
462-
"required": true
463-
},
464-
{
465-
"key": "ssh_allowed_cidr",
499+
"key": "deployment_mode",
466500
"type": "string",
467-
"default_value": "0.0.0.0/0",
468-
"description": "CIDR block allowed for SSH access. Change to your organization's IP range for better security and for non-developement deployments",
469-
"required": true
501+
"display_name": "Deployment Mode",
502+
"default_value": "single-node",
503+
"description": "Deployment mode for the infrastructure (single-node or multi-node)",
504+
"required": true,
505+
"options": [
506+
{
507+
"displayname": "Single Node",
508+
"value": "single-node"
509+
},
510+
{
511+
"displayname": "Multi Node",
512+
"value": "multi-node"
513+
}
514+
]
470515
},
471-
{
472-
"key": "image",
473-
"type": "string",
474-
"default_value": "gaudi3-os-u22-01-21-0",
475-
"description": "This variable image name to be used for IAAS bringup on VSI instance"
516+
{
517+
"key": "control_plane_count",
518+
"type": "number",
519+
"display_name": "Control plane nodes count",
520+
"default_value": "3",
521+
"description": "Number of control plane nodes (1 for single or 3 for HA) - only used in multi-node mode",
522+
"required": true
476523
},
477-
{
478-
"key": "cpu_or_gpu",
479-
"type": "string",
480-
"default_value": "gaudi3",
481-
"description": "This variable specifies where the model should be running"
524+
{
525+
"key": "worker_gaudi_count",
526+
"type": "number",
527+
"display_name": "Worker nodes count",
528+
"default_value": "1",
529+
"description": "Number of Gaudi worker nodes for inference - only used in multi-node mode",
530+
"required": true
531+
},
532+
{
533+
"key": "vault_pass_code",
534+
"type": "multiline_secure_value",
535+
"default_value": "",
536+
"description": "Enter the master vault pass code",
537+
"required": true
482538
}
483539

484540
]

third_party/IBM/.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ patterns/**/*.terraform.lock.hcl
1212
# control as they are data points which are potentially sensitive and subject
1313
# to change depending on the environment.
1414
# Ignore all .tfvars files anywhere
15-
*.tfvars
1615
**/*.tfvars
1716

1817

third_party/IBM/docs/assets/architecture.svg

Lines changed: 1 addition & 1 deletion
Loading
-73.2 KB
Loading
131 KB
Loading
-69 KB
Loading
-72.7 KB
Loading
-4.36 KB
Loading
-72.2 KB
Loading
-44.6 KB
Loading

0 commit comments

Comments
 (0)