diff --git a/cra-config.yaml b/cra-config.yaml index 17a96642..632a7a57 100644 --- a/cra-config.yaml +++ b/cra-config.yaml @@ -1,3 +1,12 @@ # More info about this file at https://github.com/terraform-ibm-modules/common-pipeline-assets/blob/main/.github/workflows/terraform-test-pipeline.md#cra-config-yaml version: "v1" -CRA_TARGETS: [] +CRA_TARGETS: + - CRA_TARGET: "solutions/hpc" + CRA_IGNORE_RULES_FILE: "cra-tf-validate-ignore-rules.json" + PROFILE_ID: "1c13d739-e09e-4bf4-8715-dd82e4498041" # SCC profile ID (currently set to CIS IBM Cloud Foundations Benchmark 1.0.0 profile). + CRA_ENVIRONMENT_VARIABLES: + TF_VAR_cluster_id: "HPC-LSF-1" + TF_VAR_reservation_id: "Contract-IBM-WES-DA" + TF_VAR_bastion_ssh_keys: "[\"geretain-hpc\"]" + TF_VAR_compute_ssh_keys: "[\"geretain-hpc\"]" + TF_VAR_remote_allowed_ips: "[\"49.207.216.50\"]" diff --git a/ibm_catalog.json b/ibm_catalog.json index cbf921a9..ae4d2dd7 100644 --- a/ibm_catalog.json +++ b/ibm_catalog.json @@ -51,8 +51,8 @@ "authority": "scc-v3", "profiles": [ { - "profile_name": "IBM Cloud Framework for Financial Services", - "profile_version": "1.6.0" + "profile_name": "CIS IBM Cloud Foundations Benchmark", + "profile_version": "1.0.0" } ] }, @@ -110,6 +110,55 @@ { "key": "cluster_prefix" }, + { + "key": "observability_atracker_on_cos_enable" + }, + { + "key": "observability_monitoring_enable" + }, + { + "key": "observability_monitoring_on_compute_nodes_enable" + }, + { + "key": "observability_monitoring_plan", + "default_value": "graduated-tier", + "options": [ + { + "displayname": "graduated-tier", + "value": "graduated-tier" + }, + { + "displayname": "lite", + "value": "lite" + } + ] + }, + { + "key": "scc_enable" + }, + { + "key": "scc_profile" + }, + { + "key": "scc_profile_version" + }, + { + "key": "scc_location" + }, + { + "key": "scc_event_notification_plan", + "default_value": "lite", + "options": [ + { + "displayname": "lite", + "value": "lite" + }, + { + "displayname": "standard", + "value": "standard" + } + ] + }, { "key": "vpc_cidr" }, @@ -147,7 +196,19 @@ "key": "login_image_name" }, { - "key": "custom_file_shares" + "key": "custom_file_shares", + "type": "array", + "default_value": "[\n {\n \"mount_path\": \"/mnt/vpcstorage/tools\",\n \"size\": 100,\n \"iops\": 2000\n },\n {\n \"mount_path\": \"/mnt/vpcstorage/data\",\n \"size\": 100,\n \"iops\": 6000\n },\n {\n \"mount_path\": \"/mnt/scale/tools\",\n \"nfs_share\": \"\"\n }\n]\n", + "display_name": "JSON", + "required": false, + "custom_config": { + "type": "json_editor", + "grouping": "deployment", + "original_grouping": "deployment", + "config_constraints": { + "type": "mixed" + } + } }, { "key": "storage_security_group_id" @@ -167,29 +228,6 @@ { "key": "cos_instance_name" }, - { - "key": "observability_atracker_on_cos_enable" - }, - { - "key": "observability_monitoring_enable" - }, - { - "key": "observability_monitoring_on_compute_nodes_enable" - }, - { - "key": "observability_monitoring_plan", - "default_value": "graduated-tier", - "options": [ - { - "displayname": "graduated-tier", - "value": "graduated-tier" - }, - { - "displayname": "lite", - "value": "lite" - } - ] - }, { "key": "enable_vpc_flow_logs" }, @@ -205,32 +243,6 @@ { "key": "kms_key_name" }, - { - "key": "scc_enable" - }, - { - "key": "scc_profile" - }, - { - "key": "scc_profile_version" - }, - { - "key": "scc_location" - }, - { - "key": "scc_event_notification_plan", - "default_value": "lite", - "options": [ - { - "displayname": "lite", - "value": "lite" - }, - { - "displayname": "standard", - "value": "standard" - } - ] - }, { "key": "hyperthreading_enabled" }, @@ -273,6 +285,9 @@ { "key": "skip_iam_authorization_policy" }, + { + "key": "skip_iam_share_authorization_policy" + }, { "key": "existing_certificate_instance" }, @@ -373,7 +388,7 @@ "description": "Yes" }, { - "title": "Simplifies risk management and demonstrates regulatory compliance with Financial Services", + "title": "Simplifies risk management and demonstrates regulatory compliance with CIS IBM Cloud Foundations Benchmark Services", "description": "Yes" }, { diff --git a/modules/landing_zone_vsi/configuration_steps/configure_management_vsi.sh b/modules/landing_zone_vsi/configuration_steps/configure_management_vsi.sh index fb2a1b0a..4c694d5f 100644 --- a/modules/landing_zone_vsi/configuration_steps/configure_management_vsi.sh +++ b/modules/landing_zone_vsi/configuration_steps/configure_management_vsi.sh @@ -390,6 +390,7 @@ EOF # 7. Create resource template for ibmcloudhpc templates # Define the output JSON file path + ibmcloudhpc_templates="$LSF_RC_IBMCLOUDHPC_CONF/ibmcloudhpc_templates.json" # Initialize an empty JSON string @@ -400,6 +401,11 @@ for region in "eu-de" "us-east" "us-south"; do if [ "$region" = "$regionName" ]; then # Loop through the core counts for i in 2 4 8 16 32 48 64 96 128 176; do + if [ "$i" -gt 128 ] && [ "$region" != "us-south" ]; then + # Skip creating templates with more than 128 cores for non us-south regions + continue + fi + ncores=$((i / 2)) if [ "$region" = "eu-de" ] || [ "$region" = "us-east" ]; then family="mx2" @@ -430,6 +436,11 @@ for region in "eu-de" "us-east" "us-south"; do # Split the family string into an array and iterate over it IFS=',' read -ra families <<< "$family" for fam in "${families[@]}"; do + # Check if the core count is valid for the family + if [ "$fam" = "mx2" ] && [ "$i" -gt 128 ]; then + continue + fi + templateId="Template-${cluster_prefix}-$((1000+i))-$fam" # Add family to templateId if [ "$fam" = "mx2" ]; then maxmem_val="$maxmem_mx2" # Use mx2 specific maxmem value diff --git a/modules/landing_zone_vsi/image_map.tf b/modules/landing_zone_vsi/image_map.tf index 17adf743..04d3c81e 100644 --- a/modules/landing_zone_vsi/image_map.tf +++ b/modules/landing_zone_vsi/image_map.tf @@ -1,9 +1,9 @@ locals { image_region_map = { - "hpcaas-lsf10-rhel88-v6" = { - "us-east" = "r014-7c8ff827-42f9-4e52-8ac5-0cabfa83cc08" - "eu-de" = "r010-ef5c9c76-88c9-461a-9ea9-ae3483b12463" - "us-south" = "r006-56948288-f03a-452f-a4e8-13c9523e5aac" + "hpcaas-lsf10-rhel88-v7" = { + "us-east" = "r014-68a7ad8a-c513-418e-a30c-9a04ce0a144a" + "eu-de" = "r010-b392ff76-fb8c-4b0f-9fef-fba89eb3ee5b" + "us-south" = "r006-86f207dd-7029-4705-9222-0f5499387734" }, "hpcaas-lsf10-rhel88-compute-v5" = { "us-east" = "r014-deb34fb1-edbf-464c-9af3-7efa2efcff3f" diff --git a/modules/observability_instance/outputs.tf b/modules/observability_instance/outputs.tf index 41313a5b..be07af0f 100644 --- a/modules/observability_instance/outputs.tf +++ b/modules/observability_instance/outputs.tf @@ -25,3 +25,8 @@ output "cloud_monitoring_prws_url" { value = "https://ingest.prws.${var.location}.monitoring.cloud.ibm.com/prometheus/remote/write" description = "IBM Cloud Monitoring Prometheus Remote Write ingestion url" } + +output "cloud_monitoring_url" { + value = var.cloud_monitoring_provision ? "https://cloud.ibm.com/observe/embedded-view/monitoring/${module.observability_instance.cloud_monitoring_guid}" : null + description = "IBM Cloud Monitoring URL" +} diff --git a/solutions/hpc/README.md b/solutions/hpc/README.md index 6fad146d..6a99962a 100644 --- a/solutions/hpc/README.md +++ b/solutions/hpc/README.md @@ -111,7 +111,7 @@ | [login\_image\_name](#input\_login\_image\_name) | Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-8 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v4). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. | `string` | `"hpcaas-lsf10-rhel88-compute-v5"` | no | | [login\_node\_instance\_type](#input\_login\_node\_instance\_type) | Specify the virtual server instance profile type to be used to create the login node for the IBM Cloud HPC cluster. For choices on profile types, see [Instance profiles](https://cloud.ibm.com/docs/vpc?topic=vpc-profiles). | `string` | `"bx2-2x8"` | no | | [login\_subnet\_id](#input\_login\_subnet\_id) | Provide the list of existing subnet ID under the existing VPC, where the login/bastion server will be provisioned. One subnet id is required as input value for the creation of login node and bastion in the same zone as the management nodes. Note: Provide a different subnet id for login\_subnet\_id, do not overlap or provide the same subnet id that was already provided for cluster\_subnet\_ids. | `string` | `null` | no | -| [management\_image\_name](#input\_management\_image\_name) | Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. | `string` | `"hpcaas-lsf10-rhel88-v6"` | no | +| [management\_image\_name](#input\_management\_image\_name) | Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. | `string` | `"hpcaas-lsf10-rhel88-v7"` | no | | [management\_node\_count](#input\_management\_node\_count) | Number of management nodes. This is the total number of management nodes. Enter a value between 1 and 10. | `number` | `3` | no | | [management\_node\_instance\_type](#input\_management\_node\_instance\_type) | Specify the virtual server instance profile type to be used to create the management nodes for the IBM Cloud HPC cluster. For choices on profile types, see [Instance profiles](https://cloud.ibm.com/docs/vpc?topic=vpc-profiles). | `string` | `"bx2-16x64"` | no | | [observability\_atracker\_on\_cos\_enable](#input\_observability\_atracker\_on\_cos\_enable) | Enable Activity tracker service instance connected to Cloud Object Storage (COS). All the events will be stored into COS so that customers can connect to it and read those events or ingest them in their system. | `bool` | `true` | no | diff --git a/solutions/hpc/locals.tf b/solutions/hpc/locals.tf index 8e352ff8..5e7a4e8b 100644 --- a/solutions/hpc/locals.tf +++ b/solutions/hpc/locals.tf @@ -232,12 +232,11 @@ locals { # locals needed for ssh connection locals { - ssh_forward_host = (var.app_center_high_availability ? "pac.${var.dns_domain_name.compute}" : "localhost") - ssh_forwards = "-L 8443:${local.ssh_forward_host}:8443 -L 6080:${local.ssh_forward_host}:6080" + ssh_forward_host = (var.app_center_high_availability ? "pac.${var.dns_domain_name.compute}" : local.management_private_ip) + ssh_forwards = "-L 8443:${local.ssh_forward_host}:8443 -L 6080:${local.ssh_forward_host}:6080 -L 8444:${local.ssh_forward_host}:8444" ssh_jump_host = local.bastion_instance_public_ip != null ? local.bastion_instance_public_ip : var.enable_fip ? module.bootstrap.bastion_fip[0] : module.bootstrap.bastion_primary_ip ssh_jump_option = "-J ubuntu@${local.ssh_jump_host}" - ssh_host = var.app_center_high_availability ? local.login_private_ips[0] : local.management_private_ip - ssh_cmd = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ServerAliveInterval=5 -o ServerAliveCountMax=1 ${local.ssh_forwards} ${local.ssh_jump_option} lsfadmin@${local.ssh_host}" + ssh_cmd = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ServerAliveInterval=5 -o ServerAliveCountMax=1 ${local.ssh_forwards} ${local.ssh_jump_option} lsfadmin@${join(",", local.login_private_ips)}" } # Existing bastion Variables diff --git a/solutions/hpc/outputs.tf b/solutions/hpc/outputs.tf index 2292f831..bdc22485 100644 --- a/solutions/hpc/outputs.tf +++ b/solutions/hpc/outputs.tf @@ -87,3 +87,8 @@ output "ldap_ips" { description = "LDAP nodes have these IPs:" value = local.print_extra_outputs ? local.ldap_private_ips : null } + +output "cloud_monitoring_url" { + value = var.observability_monitoring_enable ? module.cloud_monitoring_instance_creation.cloud_monitoring_url : null + description = "IBM Cloud Monitoring URL" +} diff --git a/solutions/hpc/variables.tf b/solutions/hpc/variables.tf index b1cb5115..05937dec 100644 --- a/solutions/hpc/variables.tf +++ b/solutions/hpc/variables.tf @@ -176,7 +176,7 @@ variable "login_node_instance_type" { } variable "management_image_name" { type = string - default = "hpcaas-lsf10-rhel88-v6" + default = "hpcaas-lsf10-rhel88-v7" description = "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." } diff --git a/tests/common_utils/utils.go b/tests/common_utils/utils.go index 896124be..6adbcd9e 100644 --- a/tests/common_utils/utils.go +++ b/tests/common_utils/utils.go @@ -4,6 +4,7 @@ import ( "bufio" "bytes" "context" + "encoding/json" "errors" "fmt" "math/rand" @@ -17,7 +18,7 @@ import ( "testing" "time" - "github.com/IBM/go-sdk-core/core" + "github.com/IBM/go-sdk-core/v5/core" "github.com/IBM/secrets-manager-go-sdk/secretsmanagerv2" "github.com/stretchr/testify/assert" "github.com/terraform-ibm-modules/ibmcloud-terratest-wrapper/testhelper" @@ -723,3 +724,44 @@ func GetDnsCustomResolverIds(outputs map[string]interface{}) (string, string) { } return instanceId, customResolverId } + +// Configuration struct matches the structure of your JSON data +type Configuration struct { + ClusterID string `json:"ClusterID"` + ReservationID string `json:"ReservationID"` + ClusterPrefixName string `json:"ClusterPrefixName"` + ResourceGroup string `json:"ResourceGroup"` + KeyManagement string `json:"KeyManagement"` + DnsDomainName string `json:"DnsDomainName"` + Zones string `json:"Zones"` + HyperthreadingEnabled bool `json:"HyperthreadingEnabled"` + BastionIP string `json:"bastionIP"` + ManagementNodeIPList []string `json:"managementNodeIPList"` + LoginNodeIP string `json:"loginNodeIP"` + LdapServerIP string `json:"LdapServerIP"` + LdapDomain string `json:"LdapDomain"` + LdapAdminPassword string `json:"LdapAdminPassword"` + LdapUserName string `json:"LdapUserName"` + LdapUserPassword string `json:"LdapUserPassword"` + AppCenterEnabledOrNot string `json:"APPCenterEnabledOrNot"` + SshKeyPath string `json:"ssh_key_path"` +} + +// ParseConfig reads a JSON file from the given file path and parses it into a Configuration struct +func ParseConfig(filePath string) (*Configuration, error) { + // Read the entire content of the file + byteValue, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("error reading file %s: %w", filePath, err) + } + + // Unmarshal the JSON data into the Configuration struct + var config Configuration + err = json.Unmarshal(byteValue, &config) + if err != nil { + return nil, fmt.Errorf("error parsing JSON from file %s: %w", filePath, err) + } + + // Return the configuration struct and nil error on success + return &config, nil +} diff --git a/tests/go.mod b/tests/go.mod index 520d974d..ccb06543 100644 --- a/tests/go.mod +++ b/tests/go.mod @@ -3,7 +3,7 @@ module github.com/terraform-ibm-modules/terraform-ibm-hpc go 1.21.3 require ( - github.com/IBM/go-sdk-core v1.1.0 + github.com/IBM/go-sdk-core/v5 v5.17.3 github.com/IBM/secrets-manager-go-sdk v1.2.0 github.com/gruntwork-io/terratest v0.46.15 github.com/stretchr/testify v1.9.0 @@ -23,7 +23,6 @@ require ( github.com/IBM-Cloud/bluemix-go v0.0.0-20240423071914-9e96525baef4 // indirect github.com/IBM-Cloud/power-go-client v1.6.0 // indirect github.com/IBM/cloud-databases-go-sdk v0.7.0 // indirect - github.com/IBM/go-sdk-core/v5 v5.17.3 // indirect github.com/IBM/platform-services-go-sdk v0.63.1 // indirect github.com/IBM/project-go-sdk v0.3.0 // indirect github.com/IBM/vpc-go-sdk v0.51.0 // indirect @@ -37,7 +36,6 @@ require ( github.com/cloudflare/circl v1.3.8 // indirect github.com/cyphar/filepath-securejoin v0.2.5 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/gabriel-vasile/mimetype v1.4.3 // indirect @@ -120,7 +118,6 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20240506185236-b8a5c65736ae // indirect google.golang.org/grpc v1.63.2 // indirect google.golang.org/protobuf v1.34.1 // indirect - gopkg.in/go-playground/validator.v9 v9.31.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/tests/go.sum b/tests/go.sum index 0baef1d1..bc947cc7 100644 --- a/tests/go.sum +++ b/tests/go.sum @@ -197,8 +197,6 @@ github.com/IBM-Cloud/power-go-client v1.6.0 h1:X+QX+WSF66+aouyaf4r+IeBLXUurAJj9+ github.com/IBM-Cloud/power-go-client v1.6.0/go.mod h1:0ad5Lcq1utoYVJx0uqooMjCpUaYaK0ItP9QJYtY6k0Y= github.com/IBM/cloud-databases-go-sdk v0.7.0 h1:prvLebKD1kcIk81D6yRhOr/TWp1VQJGLhGAasQr7RtA= github.com/IBM/cloud-databases-go-sdk v0.7.0/go.mod h1:JYucI1PdwqbAd8XGdDAchxzxRP7bxOh1zUnseovHKsc= -github.com/IBM/go-sdk-core v1.1.0 h1:pV73lZqr9r1xKb3h08c1uNG3AphwoV5KzUzhS+pfEqY= -github.com/IBM/go-sdk-core v1.1.0/go.mod h1:2pcx9YWsIsZ3I7kH+1amiAkXvLTZtAq9kbxsfXilSoY= github.com/IBM/go-sdk-core/v5 v5.17.3 h1:CZSVCKzhQc/hRQZOtuEmi9dlNtWMnxJvOsPtQKP7cZ4= github.com/IBM/go-sdk-core/v5 v5.17.3/go.mod h1:GatGZpxlo1KaxiRN6E10/rNgWtUtx1hN/GoHSCaSPKA= github.com/IBM/platform-services-go-sdk v0.63.1 h1:F5mZU1hKDHqpZa85twUeSYmM9g9gwNAdja097rfpxJY= @@ -260,8 +258,6 @@ github.com/cyphar/filepath-securejoin v0.2.5/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxG github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/elazarl/goproxy v0.0.0-20230808193330-2592e75ae04a h1:mATvB/9r/3gvcejNsXKSkQ6lcIaNec2nyfOdlTBR2lU= github.com/elazarl/goproxy v0.0.0-20230808193330-2592e75ae04a/go.mod h1:Ro8st/ElPeALwNFlcTpWmkr6IoMFfkjXAvTHpevnDsM= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= @@ -332,10 +328,8 @@ github.com/go-openapi/validate v0.24.0 h1:LdfDKwNbpB6Vn40xhTdNZAnfLECL81w+VX3Bum github.com/go-openapi/validate v0.24.0/go.mod h1:iyeX1sEufmv3nPbBdX3ieNviWnOZaJ1+zquzJEf2BAQ= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= -github.com/go-playground/locales v0.12.1/go.mod h1:IUMDtCfWo/w/mtMfIE/IG2K+Ey3ygWanZIBtBW0W2TM= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= -github.com/go-playground/universal-translator v0.16.0/go.mod h1:1AnU7NaIRDWWzGEKwgtJRd2xk99HeFyHw3yid4rvQIY= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8= @@ -501,7 +495,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= @@ -1262,11 +1255,6 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EV gopkg.in/cheggaaa/pb.v1 v1.0.27/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/go-playground/assert.v1 v1.2.1 h1:xoYuJVE7KT85PYWrN730RguIQO0ePzVRfFMXadIrXTM= -gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= -gopkg.in/go-playground/validator.v9 v9.30.0/go.mod h1:+c9/zcJMFNgbLvly1L1V+PpxWdVbfP1avr/N00E2vyQ= -gopkg.in/go-playground/validator.v9 v9.31.0 h1:bmXmP2RSNtFES+bn4uYuHT7iJFJv7Vj+an+ZQdDaD1M= -gopkg.in/go-playground/validator.v9 v9.31.0/go.mod h1:+c9/zcJMFNgbLvly1L1V+PpxWdVbfP1avr/N00E2vyQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= diff --git a/tests/lsf/lsf_cluster_test_validation.go b/tests/lsf/lsf_cluster_test_validation.go index 964a5fd3..a0d6b73f 100644 --- a/tests/lsf/lsf_cluster_test_validation.go +++ b/tests/lsf/lsf_cluster_test_validation.go @@ -544,3 +544,100 @@ func ValidatePACANDLDAPClusterConfiguration(t *testing.T, options *testhelper.Te testLogger.Info(t, t.Name()+" Validation ended") } + +// ValidateClusterConfigurationWithAPPCenterForExistingEnv validates the configuration of an existing cluster with App Center integration. +// It verifies various aspects including management node configuration, SSH keys, failover and failback, LSF daemon restart, dynamic compute node configuration, +// login node configuration, SSH connectivity, application center configuration, noVNC configuration, PTR records, and file share encryption. +// +// testLogger: *utils.AggregatedLogger - The logger for the test. +func ValidateClusterConfigurationWithAPPCenterForExistingEnv( + t *testing.T, + bastionIP, loginNodeIP, expectedClusterID, expectedReservationID, expectedMasterName, expectedResourceGroup, + expectedKeyManagement, expectedZone, expectedDnsDomainName string, + managementNodeIPList []string, + expectedHyperthreadingEnabled bool, + testLogger *utils.AggregatedLogger, +) { + // Retrieve job commands for different levels + JOB_COMMAND_LOW := GetJobCommand(expectedZone, "low") + JOB_COMMAND_MED := GetJobCommand(expectedZone, "med") + + // Log the start of validation + testLogger.Info(t, t.Name()+" Cluster created successfully") + testLogger.Info(t, t.Name()+" Validation started ......") + + // Connect to the master node via SSH + sshClient, connectionErr := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList[0]) + require.Nil(t, connectionErr, "Failed to connect to the master via SSH: %v", connectionErr) + defer sshClient.Close() + + testLogger.Info(t, "SSH connection to the master successful") + t.Log("Validation in progress. Please wait...") + + // Verify management node configuration + VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, JOB_COMMAND_LOW, EXPECTED_LSF_VERSION, testLogger) + + // Verify SSH key + VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "management", managementNodeIPList, testLogger) + + // Perform failover and failback + FailoverAndFailback(t, sshClient, JOB_COMMAND_MED, testLogger) + + // Restart LSF daemon + RestartLsfDaemon(t, sshClient, JOB_COMMAND_LOW, testLogger) + + // Reboot instance + RebootInstance(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, managementNodeIPList[0], JOB_COMMAND_MED, testLogger) + + // Reconnect to the master node via SSH after reboot + sshClient, connectionErr = utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList[0]) + require.Nil(t, connectionErr, "Failed to connect to the master via SSH: %v", connectionErr) + defer sshClient.Close() + + // Wait for dynamic node disappearance + defer func() { + if err := LSFWaitForDynamicNodeDisappearance(t, sshClient, testLogger); err != nil { + t.Errorf("Error in LSFWaitForDynamicNodeDisappearance: %v", err) + } + }() + + // Get dynamic compute node IPs + computeNodeIPList, computeIPErr := LSFGETDynamicComputeNodeIPs(t, sshClient, testLogger) + require.Nil(t, computeIPErr, "Error getting dynamic compute node IPs: %v", computeIPErr) + + // Verify compute node configuration + VerifyComputetNodeConfig(t, sshClient, expectedHyperthreadingEnabled, computeNodeIPList, testLogger) + + // Verify SSH key for compute nodes + VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "compute", computeNodeIPList, testLogger) + + // Connect to the login node via SSH + sshLoginNodeClient, connectionErr := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, loginNodeIP) + require.NoError(t, connectionErr, "Failed to connect to the login node via SSH: %v", connectionErr) + defer sshLoginNodeClient.Close() + + // Verify login node configuration + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, JOB_COMMAND_LOW, EXPECTED_LSF_VERSION, testLogger) + + // Re-fetch dynamic compute node IPs + computeNodeIPList, computeIPErr = LSFGETDynamicComputeNodeIPs(t, sshClient, testLogger) + require.Nil(t, computeIPErr, "Error getting dynamic compute node IPs: %v", computeIPErr) + + // Verify SSH connectivity to nodes from login + VerifySSHConnectivityToNodesFromLogin(t, sshLoginNodeClient, managementNodeIPList, computeNodeIPList, testLogger) + + // Verify application center configuration + VerifyAPPCenterConfig(t, sshClient, testLogger) + + // Verify noVNC configuration + VerifyNoVNCConfig(t, sshClient, testLogger) + + // Verify PTR records for management and login nodes + VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) + + // Verify file share encryption + VerifyFileShareEncryption(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, expectedKeyManagement, testLogger) + + // Log the end of validation + testLogger.Info(t, t.Name()+" Validation ended") +} diff --git a/tests/lsf/lsf_cluster_utils.go b/tests/lsf/lsf_cluster_utils.go index 1ad267cf..09bb26f6 100644 --- a/tests/lsf/lsf_cluster_utils.go +++ b/tests/lsf/lsf_cluster_utils.go @@ -368,7 +368,8 @@ func LSFRunJobs(t *testing.T, sClient *ssh.Client, jobCmd string, logger *utils. return fmt.Errorf("failed to run '%s' command: %w", jobCmd, err) } - // Extract the job ID from the job output + logger.Info(t, fmt.Sprintf("Submitted Job command: %s", jobCmd)) + jobTime := utils.SplitAndTrim(jobCmd, "sleep")[1] min, err := utils.StringToInt(jobTime) if err != nil { @@ -1183,10 +1184,9 @@ func GetJobCommand(zone, jobType string) string { // with the specified cluster prefix, and verifies encryption settings. func VerifyEncryption(t *testing.T, apiKey, region, resourceGroup, clusterPrefix, keyManagement string, logger *utils.AggregatedLogger) error { - // Set custom resource group if it's resource group is null - // In case the resource group is null , set it to a default value "workload-rg" + // In case the resource group is null , Set custom resource group it to a "clusterPrefix-workload-rg" if strings.Contains(resourceGroup, "null") { - resourceGroup = "workload-rg" + resourceGroup = fmt.Sprintf("%s-workload-rg", clusterPrefix) } // Login to IBM Cloud using the API key and VPC region @@ -1194,15 +1194,19 @@ func VerifyEncryption(t *testing.T, apiKey, region, resourceGroup, clusterPrefix return fmt.Errorf("failed to log in to IBM Cloud: %w", err) } - // Get the list of file shares + // Determine the command to get the list of file shares fileSharesCmd := fmt.Sprintf("ibmcloud is shares | grep %s | awk '{print $2}'", clusterPrefix) - cmd := exec.Command("bash", "-c", fileSharesCmd) - output, err := cmd.CombinedOutput() + if strings.Contains(resourceGroup, "workload") { + fileSharesCmd = fmt.Sprintf("ibmcloud is shares | grep %s | awk 'NR>1 {print $2}'", clusterPrefix) + } + + // Retrieve the list of file shares + fileSharesOutput, err := exec.Command("bash", "-c", fileSharesCmd).CombinedOutput() if err != nil { return fmt.Errorf("failed to retrieve file shares: %w", err) } - fileShareNames := strings.Fields(string(output)) + fileShareNames := strings.Fields(string(fileSharesOutput)) logger.Info(t, fmt.Sprintf("File share list: %s", fileShareNames)) for _, fileShareName := range fileShareNames { @@ -1261,7 +1265,8 @@ func LSFRunJobsAsLDAPUser(t *testing.T, sClient *ssh.Client, jobCmd, ldapUser st return fmt.Errorf("failed to run '%s' command: %w", jobCmd, err) } - // Extract the job ID from the job output + logger.Info(t, fmt.Sprintf("Submitted Job command: %s", jobCmd)) + jobTime := utils.SplitAndTrim(jobCmd, "sleep")[1] min, err := utils.StringToInt(jobTime) if err != nil { diff --git a/tests/other_test.go b/tests/other_test.go index 44215326..301ac855 100644 --- a/tests/other_test.go +++ b/tests/other_test.go @@ -2,11 +2,14 @@ package tests import ( "os" + "path/filepath" "strings" "testing" "github.com/stretchr/testify/require" + terra "github.com/gruntwork-io/terratest/modules/terraform" + "github.com/stretchr/testify/assert" utils "github.com/terraform-ibm-modules/terraform-ibm-hpc/common_utils" lsf "github.com/terraform-ibm-modules/terraform-ibm-hpc/lsf" ) @@ -657,3 +660,103 @@ func RunHpcNewVpcExistCustomDnsNull(t *testing.T, customResolverId string) { lsf.ValidateClusterConfiguration(t, options, testLogger) } + +// TestRunWithoutMandatory tests Terraform's behavior when mandatory variables are missing by checking for specific error messages. +func TestRunWithoutMandatory(t *testing.T) { + t.Parallel() + + // Setup test suite + setupTestSuite(t) + + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // Getting absolute path of solutions/hpc + abs, err := filepath.Abs("solutions/hpc") + + if err != nil { + require.Error(t, err, "Absolute path is:: %v", abs) + } + + terrPath := strings.ReplaceAll(abs, "tests/", "") + + // Define Terraform options + terraformOptions := terra.WithDefaultRetryableErrors(t, &terra.Options{ + TerraformDir: terrPath, + Vars: map[string]interface{}{}, + }) + + // Initialize and plan the Terraform deployment + _, err = terra.InitAndPlanE(t, terraformOptions) + + // If there is an error, check if it contains specific mandatory fields + if err != nil { + result := utils.VerifyDataContains(t, err.Error(), "cluster_id", testLogger) && + utils.VerifyDataContains(t, err.Error(), "reservation_id", testLogger) && + utils.VerifyDataContains(t, err.Error(), "bastion_ssh_keys", testLogger) && + utils.VerifyDataContains(t, err.Error(), "compute_ssh_keys", testLogger) && + utils.VerifyDataContains(t, err.Error(), "remote_allowed_ips", testLogger) + // Assert that the result is true if all mandatory fields are missing + assert.True(t, result) + } + +} + +func TestRunCIDRsAsNonDefault(t *testing.T) { + // Parallelize the test + t.Parallel() + + // Setup test suite + setupTestSuite(t) + + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // HPC cluster prefix + hpcClusterPrefix := utils.GenerateRandomString() + + // Retrieve cluster information from environment variables + envVars := GetEnvVars() + + // Create test options + options, err := setupOptions(t, hpcClusterPrefix, terraformDir, envVars.DefaultResourceGroup, ignoreDestroys) + require.NoError(t, err, "Error setting up test options: %v", err) + + options.TerraformVars["vpc_cidr"] = "10.243.0.0/18" + options.TerraformVars["vpc_cluster_private_subnets_cidr_blocks"] = []string{"10.243.0.0/20"} + options.TerraformVars["vpc_cluster_login_private_subnets_cidr_blocks"] = []string{"10.243.16.0/28"} + + options.SkipTestTearDown = true + defer options.TestTearDown() + + lsf.ValidateBasicClusterConfiguration(t, options, testLogger) +} + +// TestExistingPACEnvironment tests the validation of an existing PAC environment configuration. +func TestExistingPACEnvironment(t *testing.T) { + // Parallelize the test to run concurrently with others + t.Parallel() + + // Setup the test suite environment + setupTestSuite(t) + + // Log the initiation of cluster creation process + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // Retrieve the environment variable for the JSON file path + val, ok := os.LookupEnv("EXISTING_ENV_JSON_FILE_PATH") + if !ok { + t.Fatal("Environment variable 'EXISTING_ENV_JSON_FILE_PATH' is not set") + } + + // Check if the JSON file exists + if _, err := os.Stat(val); os.IsNotExist(err) { + t.Fatalf("JSON file '%s' does not exist", val) + } + + // Parse the JSON configuration file + config, err := utils.ParseConfig(val) + require.NoError(t, err, "Error parsing JSON configuration: %v", err) + + // Validate the cluster configuration + lsf.ValidateClusterConfigurationWithAPPCenterForExistingEnv(t, config.BastionIP, config.LoginNodeIP, config.ClusterID, config.ReservationID, config.ClusterPrefixName, config.ResourceGroup, + config.KeyManagement, config.Zones, config.DnsDomainName, config.ManagementNodeIPList, config.HyperthreadingEnabled, testLogger) +} diff --git a/tests/test_config.yml b/tests/test_config.yml index 812dca9e..e0aa2a35 100644 --- a/tests/test_config.yml +++ b/tests/test_config.yml @@ -7,7 +7,7 @@ remote_allowed_ips: ssh_key: geretain-hpc login_node_instance_type: bx2-2x8 login_image_name: hpcaas-lsf10-rhel88-compute-v5 -management_image_name: hpcaas-lsf10-rhel88-v6 +management_image_name: hpcaas-lsf10-rhel88-v7 compute_image_name: hpcaas-lsf10-rhel88-compute-v5 management_node_instance_type: bx2-16x64 management_node_count: 2