Skip to content

Commit 0f9b3e0

Browse files
Joella Regina Mathiasravinitp
authored andcommitted
Added - Support for Compute GPU Memory Cluster and Fabric in GPU Control Plane
1 parent b282576 commit 0f9b3e0

23 files changed

+2948
-0
lines changed
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
provider "oci" {
2+
auth = var.auth
3+
config_file_profile = var.config_file_profile
4+
region = var.region
5+
}
6+
7+
// variables
8+
variable "auth" {}
9+
variable "region" {}
10+
variable "config_file_profile" {}
11+
variable "compartment_ocid" {}
12+
variable "tenancy_ocid" {}
13+
variable "gb200_image_id" {}
14+
variable "compute_gpu_memory_cluster_size" {
15+
default = 18
16+
}
17+
18+
// dependent data
19+
data "oci_identity_availability_domain" "ad" {
20+
compartment_id = var.compartment_ocid
21+
ad_number = 1
22+
}
23+
24+
resource "oci_core_compute_capacity_topology" "test_compute_capacity_topology" {
25+
availability_domain = data.oci_identity_availability_domain.ad.name
26+
compartment_id = var.compartment_ocid
27+
display_name = "TestDedicatedCapacityTopology"
28+
freeform_tags = {
29+
"department" = "Internal"
30+
}
31+
capacity_source {
32+
capacity_type = "DEDICATED"
33+
compartment_id = var.tenancy_ocid
34+
}
35+
}
36+
37+
output "output_compute_capacity_topology" {
38+
value = oci_core_compute_capacity_topology.test_compute_capacity_topology
39+
}
40+
41+
// required resources
42+
resource "oci_core_vcn" "test_vcn" {
43+
cidr_block = "10.0.0.0/16"
44+
compartment_id = var.compartment_ocid
45+
display_name = "TestVcn"
46+
}
47+
48+
resource "oci_core_subnet" "test_subnet" {
49+
availability_domain = lower(
50+
data.oci_identity_availability_domain.ad.name,
51+
)
52+
cidr_block = "10.0.1.0/24"
53+
compartment_id = var.compartment_ocid
54+
vcn_id = oci_core_vcn.test_vcn.id
55+
display_name = "TestSubnet"
56+
}
57+
58+
resource "oci_core_network_security_group" "test_network_security_group" {
59+
compartment_id = var.compartment_ocid
60+
vcn_id = oci_core_vcn.test_vcn.id
61+
display_name = "TestNetworkSecurityGroup"
62+
}
63+
64+
resource "oci_core_instance_configuration" "test_instance_configuration" {
65+
compartment_id = var.compartment_ocid
66+
display_name = "TestInstanceConfiguration"
67+
68+
instance_details {
69+
instance_type = "compute"
70+
71+
launch_details {
72+
// Since sufficient capacity available only in AD3
73+
availability_domain = data.oci_identity_availability_domain.ad.name
74+
compartment_id = var.compartment_ocid
75+
shape = "BM.GPU.GB200.4"
76+
77+
metadata = {}
78+
79+
source_details {
80+
image_id = var.gb200_image_id
81+
source_type = "image"
82+
}
83+
84+
create_vnic_details {
85+
assign_public_ip = "false"
86+
nsg_ids = [oci_core_network_security_group.test_network_security_group.id]
87+
subnet_id = oci_core_subnet.test_subnet.id
88+
assign_private_dns_record = "true"
89+
}
90+
}
91+
}
92+
}
93+
94+
output "output_instance_configuration" {
95+
value = oci_core_instance_configuration.test_instance_configuration
96+
}
97+
98+
resource "oci_core_compute_cluster" "test_compute_cluster" {
99+
availability_domain = data.oci_identity_availability_domain.ad.name
100+
compartment_id = var.compartment_ocid
101+
display_name = "TestComputeCluster"
102+
}
103+
104+
output "output_compute_cluster" {
105+
value = oci_core_compute_cluster.test_compute_cluster
106+
}
107+
108+
resource "oci_identity_tag_namespace" "test_tag_namespace" {
109+
compartment_id = var.compartment_ocid
110+
description = "test tag namespace"
111+
name = "test-tag-namespace-all"
112+
}
113+
114+
resource "oci_identity_tag" "tag" {
115+
description = "test tag"
116+
name = "test-tag"
117+
tag_namespace_id = oci_identity_tag_namespace.test_tag_namespace.id
118+
}
119+
120+
// our new data sources & resources
121+
data "oci_core_compute_gpu_memory_fabrics" "all_available_memory_fabrics" {
122+
compartment_id = var.compartment_ocid
123+
availability_domain = data.oci_identity_availability_domain.ad.name
124+
compute_gpu_memory_fabric_health = "HEALTHY"
125+
compute_gpu_memory_fabric_lifecycle_state = "AVAILABLE"
126+
127+
depends_on = [oci_core_compute_capacity_topology.test_compute_capacity_topology]
128+
}
129+
130+
output "all_available_gpu_memory_fabrics" {
131+
value = data.oci_core_compute_gpu_memory_fabrics.all_available_memory_fabrics
132+
}
133+
134+
resource "oci_core_compute_gpu_memory_cluster" "test_compute_gpu_memory_cluster" {
135+
#Required
136+
availability_domain = data.oci_identity_availability_domain.ad.name
137+
compartment_id = var.compartment_ocid
138+
compute_cluster_id = oci_core_compute_cluster.test_compute_cluster.id
139+
instance_configuration_id = oci_core_instance_configuration.test_instance_configuration.id
140+
141+
#Optional
142+
defined_tags = {
143+
"${oci_identity_tag_namespace.test_tag_namespace.name}.${oci_identity_tag.tag.name}" = "TestGMC-tag"
144+
}
145+
display_name = "TestGMC"
146+
freeform_tags = { "department" = "Internal" }
147+
gpu_memory_fabric_id = data.oci_core_compute_gpu_memory_fabrics.all_available_memory_fabrics.compute_gpu_memory_fabric_collection[0].items[0].compute_gpu_memory_fabric_id
148+
size = var.compute_gpu_memory_cluster_size
149+
150+
depends_on = [oci_core_compute_capacity_topology.test_compute_capacity_topology]
151+
}
152+
153+
output "output_get_gpu_memory_cluster" {
154+
value = oci_core_compute_gpu_memory_cluster.test_compute_gpu_memory_cluster
155+
}
156+
157+
data "oci_core_compute_gpu_memory_clusters" "test_compute_gpu_memory_clusters" {
158+
#Required
159+
compartment_id = var.compartment_ocid
160+
161+
#Optional
162+
availability_domain = data.oci_identity_availability_domain.ad.name
163+
}
164+
165+
output "output_list_gpu_memory_clusters" {
166+
value = data.oci_core_compute_gpu_memory_clusters.test_compute_gpu_memory_clusters
167+
}
168+
169+
// related gmc cluster instances data source
170+
data "oci_core_compute_gpu_memory_cluster_instances" "test_compute_gpu_memory_cluster_instances" {
171+
compute_gpu_memory_cluster_id = oci_core_compute_gpu_memory_cluster.test_compute_gpu_memory_cluster.id
172+
}
173+
174+
output "list_gpu_memory_cluster_instances" {
175+
value = data.oci_core_compute_gpu_memory_cluster_instances.test_compute_gpu_memory_cluster_instances
176+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
provider "oci" {
2+
auth = var.auth
3+
config_file_profile = var.config_file_profile
4+
region = var.region
5+
}
6+
7+
variable "auth" {}
8+
variable "region" {}
9+
variable "config_file_profile" {}
10+
variable "compartment_ocid" {}
11+
variable "tenancy_ocid" {}
12+
13+
variable "compute_gpu_memory_fabric_compute_gpu_memory_fabric_health" {
14+
default = "HEALTHY"
15+
}
16+
17+
variable "compute_gpu_memory_fabric_compute_gpu_memory_fabric_lifecycle_state" {
18+
default = "AVAILABLE"
19+
}
20+
21+
variable "compute_gpu_memory_fabric_freeform_tags" {
22+
default = { "Department" = "Internal" }
23+
}
24+
25+
data "oci_identity_availability_domain" "ad" {
26+
compartment_id = var.tenancy_ocid
27+
ad_number = 1
28+
}
29+
30+
output "ad" {
31+
value = data.oci_identity_availability_domain.ad
32+
}
33+
34+
data "oci_core_compute_gpu_memory_fabrics" "gpu_memory_fabrics" {
35+
compartment_id = var.tenancy_ocid
36+
availability_domain = data.oci_identity_availability_domain.ad.name
37+
compute_gpu_memory_fabric_health = "HEALTHY"
38+
compute_gpu_memory_fabric_lifecycle_state = "AVAILABLE"
39+
}
40+
41+
output "list_gpu_memory_fabrics" {
42+
value = data.oci_core_compute_gpu_memory_fabrics.gpu_memory_fabrics
43+
}
44+
45+
data "oci_core_compute_gpu_memory_fabric" "gpu_memory_fabric" {
46+
compute_gpu_memory_fabric_id = data.oci_core_compute_gpu_memory_fabrics.gpu_memory_fabrics.compute_gpu_memory_fabric_collection[0].items[0].id
47+
}
48+
49+
output "get_gpu_memory_fabric" {
50+
value = data.oci_core_compute_gpu_memory_fabric.gpu_memory_fabric
51+
}
52+
53+
resource "oci_core_compute_gpu_memory_fabric" "gpu_memory_fabric" {
54+
compute_gpu_memory_fabric_id = data.oci_core_compute_gpu_memory_fabrics.gpu_memory_fabrics.compute_gpu_memory_fabric_collection[0].items[0].id
55+
freeform_tags = var.compute_gpu_memory_fabric_freeform_tags
56+
}
57+
58+
output "gpu_memory_fabric" {
59+
value = oci_core_compute_gpu_memory_fabric.gpu_memory_fabric
60+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
2+
// Licensed under the Mozilla Public License v2.0
3+
4+
package integrationtest
5+
6+
import (
7+
"fmt"
8+
"testing"
9+
10+
"github.com/hashicorp/terraform-plugin-testing/helper/resource"
11+
12+
"github.com/oracle/terraform-provider-oci/httpreplay"
13+
"github.com/oracle/terraform-provider-oci/internal/acctest"
14+
15+
"github.com/oracle/terraform-provider-oci/internal/utils"
16+
)
17+
18+
var (
19+
CoreComputeGpuMemoryClusterInstanceDataSourceRepresentation = map[string]interface{}{
20+
"compute_gpu_memory_cluster_id": acctest.Representation{RepType: acctest.Required, Create: `${oci_core_compute_gpu_memory_cluster.test_compute_gpu_memory_cluster.id}`},
21+
}
22+
23+
CoreComputeGpuMemoryClusterInstanceResourceConfig = CoreComputeGpuMemoryClusterResourceDependencies +
24+
acctest.GenerateResourceFromRepresentationMap("oci_core_compute_gpu_memory_cluster", "test_compute_gpu_memory_cluster", acctest.Required, acctest.Create, CoreComputeGpuMemoryClusterRepresentation)
25+
)
26+
27+
// issue-routing-tag: core/computeSharedOwnershipVmAndBm
28+
func TestCoreComputeGpuMemoryClusterInstanceResource_basic(t *testing.T) {
29+
httpreplay.SetScenario("TestCoreComputeGpuMemoryClusterInstanceResource_basic")
30+
defer httpreplay.SaveScenario()
31+
32+
config := acctest.ProviderTestConfig()
33+
34+
compartmentId := utils.GetEnvSettingWithBlankDefault("compartment_ocid")
35+
compartmentIdVariableStr := fmt.Sprintf("variable \"compartment_id\" { default = \"%s\" }\n", compartmentId)
36+
37+
gpuMemoryClusterSize := utils.GetEnvSettingWithDefault("gmc_size", "2")
38+
gpuMemoryClusterSizeVariableStr := fmt.Sprintf("variable \"gmc_size\" { default = \"%s\" }\n", gpuMemoryClusterSize)
39+
40+
imageId := utils.GetEnvSettingWithBlankDefault("gb200_image_id")
41+
imageIdVariableStr := fmt.Sprintf("variable \"image_id\" { default = \"%s\" }\n", imageId)
42+
43+
computeGpuMemoryFabricId := utils.GetEnvSettingWithBlankDefault("compute_gpu_memory_fabric_id")
44+
computeGpuMemoryFabricIdVariableStr := fmt.Sprintf("variable \"compute_gpu_memory_fabric_id\" { default = \"%s\" }\n", computeGpuMemoryFabricId)
45+
46+
datasourceName := "data.oci_core_compute_gpu_memory_cluster_instances.test_compute_gpu_memory_cluster_instances"
47+
48+
acctest.SaveConfigContent("", "", "", t)
49+
50+
acctest.ResourceTest(t, nil, []resource.TestStep{
51+
// verify datasource
52+
{
53+
Config: config +
54+
acctest.GenerateDataSourceFromRepresentationMap("oci_core_compute_gpu_memory_cluster_instances", "test_compute_gpu_memory_cluster_instances", acctest.Required, acctest.Create, CoreComputeGpuMemoryClusterInstanceDataSourceRepresentation) +
55+
compartmentIdVariableStr + gpuMemoryClusterSizeVariableStr + imageIdVariableStr + computeGpuMemoryFabricIdVariableStr + CoreComputeGpuMemoryClusterInstanceResourceConfig,
56+
Check: acctest.ComposeAggregateTestCheckFuncWrapper(
57+
resource.TestCheckResourceAttrSet(datasourceName, "compute_gpu_memory_cluster_id"),
58+
59+
resource.TestCheckResourceAttrSet(datasourceName, "compute_gpu_memory_cluster_instance_collection.#"),
60+
resource.TestCheckResourceAttr(datasourceName, "compute_gpu_memory_cluster_instance_collection.0.items.#", gpuMemoryClusterSize),
61+
),
62+
},
63+
})
64+
}

0 commit comments

Comments
 (0)