diff --git a/ibm_catalog.json b/ibm_catalog.json index c324fcb9..089ae5e2 100644 --- a/ibm_catalog.json +++ b/ibm_catalog.json @@ -331,7 +331,15 @@ "key": "bastion_security_group_id" }, { - "key": "bastion_ssh_private_key" + "key": "bastion_ssh_private_key", + "type": "multiline_secure_value", + "display_name": "Multiline secure value", + "required": false, + "custom_config": { + "grouping": "deployment", + "original_grouping": "deployment", + "type": "multiline_secure_value" + } }, { "hidden": true, diff --git a/modules/bootstrap/locals.tf b/modules/bootstrap/locals.tf index a89e43af..6f7ec67e 100644 --- a/modules/bootstrap/locals.tf +++ b/modules/bootstrap/locals.tf @@ -26,7 +26,7 @@ locals { bastion_node_name = format("%s-%s", local.prefix, "bastion") bastion_machine_type = "cx2-4x8" - bastion_image_name = "ibm-ubuntu-22-04-3-minimal-amd64-1" + bastion_image_name = "ibm-ubuntu-22-04-4-minimal-amd64-3" bastion_image_id = data.ibm_is_image.bastion.id diff --git a/modules/custom/subnet_cidr_check/main.tf b/modules/custom/subnet_cidr_check/main.tf deleted file mode 100644 index c69da089..00000000 --- a/modules/custom/subnet_cidr_check/main.tf +++ /dev/null @@ -1,17 +0,0 @@ - -locals { - subnet_cidr = [for i in [var.subnet_cidr] : [(((split(".", cidrhost(i, 0))[0]) * pow(256, 3)) #192 - + ((split(".", cidrhost(i, 0))[1]) * pow(256, 2)) - + ((split(".", cidrhost(i, 0))[2]) * pow(256, 1)) - + ((split(".", cidrhost(i, 0))[3]) * pow(256, 0))), (((split(".", cidrhost(i, -1))[0]) * pow(256, 3)) #192 - + ((split(".", cidrhost(i, -1))[1]) * pow(256, 2)) - + ((split(".", cidrhost(i, -1))[2]) * pow(256, 1)) - + ((split(".", cidrhost(i, -1))[3]) * pow(256, 0)))]] - vpc_address_prefix = [for i in var.vpc_address_prefix : [(((split(".", cidrhost(i, 0))[0]) * pow(256, 3)) #192 - + ((split(".", cidrhost(i, 0))[1]) * pow(256, 2)) - + ((split(".", cidrhost(i, 0))[2]) * pow(256, 1)) - + ((split(".", cidrhost(i, 0))[3]) * pow(256, 0))), (((split(".", cidrhost(i, -1))[0]) * pow(256, 3)) - + ((split(".", cidrhost(i, -1))[1]) * pow(256, 2)) - + ((split(".", cidrhost(i, -1))[2]) * pow(256, 1)) - + ((split(".", cidrhost(i, -1))[3]) * pow(256, 0)))]] -} diff --git a/modules/custom/subnet_cidr_check/outputs.tf b/modules/custom/subnet_cidr_check/outputs.tf deleted file mode 100644 index da2c0d02..00000000 --- a/modules/custom/subnet_cidr_check/outputs.tf +++ /dev/null @@ -1,4 +0,0 @@ -output "results" { - description = "Result of the calculation" - value = [for ip in local.vpc_address_prefix : ip[0] <= local.subnet_cidr[0][0] && ip[1] >= local.subnet_cidr[0][1]] -} diff --git a/modules/custom/subnet_cidr_check/variables.tf b/modules/custom/subnet_cidr_check/variables.tf deleted file mode 100644 index 8249beb2..00000000 --- a/modules/custom/subnet_cidr_check/variables.tf +++ /dev/null @@ -1,12 +0,0 @@ -#subnet_cidr is the cidr range of input subnet. -variable "subnet_cidr" { - description = "CIDR range of input subnet." - type = string -} - -#vpc_address_prefix is the cidr range of vpc address prefixes. -variable "vpc_address_prefix" { - description = "CIDR range of VPC address prefixes." - type = list(string) - default = [] -} diff --git a/modules/custom/subnet_cidr_check/version.tf b/modules/custom/subnet_cidr_check/version.tf deleted file mode 100644 index 69642684..00000000 --- a/modules/custom/subnet_cidr_check/version.tf +++ /dev/null @@ -1,3 +0,0 @@ -terraform { - required_version = ">= 1.3" -} diff --git a/modules/landing_zone_vsi/image_map.tf b/modules/landing_zone_vsi/image_map.tf index 225ef1e8..3e9590ed 100644 --- a/modules/landing_zone_vsi/image_map.tf +++ b/modules/landing_zone_vsi/image_map.tf @@ -1,19 +1,19 @@ locals { image_region_map = { - "hpcaas-lsf10-rhel88-v8" = { - "us-east" = "r014-ee8b808f-e129-4d9e-965e-fed7003132e7" - "eu-de" = "r010-bfad7737-77f9-4af7-9446-4783fb582258" - "us-south" = "r006-d314bc1d-e904-4124-9055-0862e1a56579" + "hpcaas-lsf10-rhel88-v9" = { + "us-east" = "r014-d2b18006-c0c4-428f-96f3-e033b970c582" + "eu-de" = "r010-3bf3f57e-1985-431d-aefe-e9914ab7919c" + "us-south" = "r006-7b0aa90b-f52c-44b1-bab7-ccbfae9f1816" }, "hpcaas-lsf10-rhel88-compute-v5" = { "us-east" = "r014-deb34fb1-edbf-464c-9af3-7efa2efcff3f" "eu-de" = "r010-2d04cfff-6f54-45d1-b3b3-7e259083d71f" "us-south" = "r006-236ee1f4-38de-4845-b7ec-e2ffa7df5d08" }, - "hpcaas-lsf10-ubuntu2204-compute-v4" = { - "us-east" = "r014-b15b5e51-ccb6-40e4-9d6b-d0d47864a8a2" - "eu-de" = "r010-39f4de94-2a55-431e-ad86-613c5b23a030" - "us-south" = "r006-fe0e6afd-4d01-4794-a9ed-dd5353dda482" + "hpcaas-lsf10-ubuntu2204-compute-v5" = { + "us-east" = "r014-ecbf4c89-16a3-472e-8bab-1e76d744e264" + "eu-de" = "r010-9811d8bf-a7f8-4ee6-8342-e5af217bc513" + "us-south" = "r006-ed76cb75-f086-48e9-8090-e2dbc411abe7" } } } diff --git a/modules/landing_zone_vsi/variables.tf b/modules/landing_zone_vsi/variables.tf index ab3bdaff..4345e7de 100644 --- a/modules/landing_zone_vsi/variables.tf +++ b/modules/landing_zone_vsi/variables.tf @@ -313,7 +313,7 @@ variable "ldap_vsi_profile" { variable "ldap_vsi_osimage_name" { type = string - default = "ibm-ubuntu-22-04-3-minimal-amd64-1" + default = "ibm-ubuntu-22-04-4-minimal-amd64-3" description = "Image name to be used for provisioning the LDAP instances." } diff --git a/samples/configs/hpc_catalog_values.json b/samples/configs/hpc_catalog_values.json index 4f13107f..210999b6 100644 --- a/samples/configs/hpc_catalog_values.json +++ b/samples/configs/hpc_catalog_values.json @@ -20,7 +20,7 @@ "enable_cos_integration" : "false", "cos_instance_name" : "__NULL__", "enable_fip" : "true", - "management_image_name" : "hpcaas-lsf10-rhel88-v8", + "management_image_name" : "hpcaas-lsf10-rhel88-v9", "compute_image_name" : "hpcaas-lsf10-rhel88-compute-v5", "login_image_name" : "hpcaas-lsf10-rhel88-compute-v5", "login_node_instance_type" : "bx2-2x8", @@ -48,7 +48,7 @@ "ldap_user_name" : "", "ldap_user_password" : "", "ldap_vsi_profile" : "cx2-2x4", - "ldap_vsi_osimage_name" : "ibm-ubuntu-22-04-3-minimal-amd64-1", + "ldap_vsi_osimage_name" : "ibm-ubuntu-22-04-4-minimal-amd64-3", "skip_iam_authorization_policy" : "false", "skip_iam_share_authorization_policy" : "false", "scc_enable" : "false", diff --git a/samples/configs/hpc_schematics_values.json b/samples/configs/hpc_schematics_values.json index ae72c70c..2362b330 100644 --- a/samples/configs/hpc_schematics_values.json +++ b/samples/configs/hpc_schematics_values.json @@ -197,7 +197,7 @@ }, { "name": "management_image_name", - "value": "hpcaas-lsf10-rhel88-v8", + "value": "hpcaas-lsf10-rhel88-v9", "type": "string", "secure": false, "description": "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." @@ -207,7 +207,7 @@ "value": "hpcaas-lsf10-rhel88-compute-v5", "type": "string", "secure": false, - "description": "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster dynamic compute nodes. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v4). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." + "description": "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster dynamic compute nodes. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v5). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." }, { @@ -215,7 +215,7 @@ "value": "hpcaas-lsf10-rhel88-compute-v5", "type": "string", "secure": false, - "description": "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v4). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." + "description": "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v5). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." }, { "name": "login_node_instance_type", @@ -377,7 +377,7 @@ }, { "name": "ldap_vsi_osimage_name", - "value": "ibm-ubuntu-22-04-3-minimal-amd64-1", + "value": "ibm-ubuntu-22-04-4-minimal-amd64-3", "type": "string", "secure": false, "description": "Image name to be used for provisioning the LDAP instances." @@ -456,28 +456,28 @@ "value": "__NULL__", "type": "string", "secure": false, - "description" : "Bastion instance name. If none given then new bastion will be created." + "description" : "Provide the name of the bastion instance. If none given then new bastion will be created." }, { "name": "bastion_instance_public_ip", "value": "__NULL__", "type": "string", "secure": false, - "description" : "Bastion instance public ip address." + "description" : "Provide the public ip address of the bastion instance to establish the remote connection." }, { "name": "bastion_security_group_id", "value": "__NULL__", "type": "string", "secure": false, - "description" : "Bastion security group id." + "description" : "Provide the security group ID of the bastion server. This security group ID will be added as an allowlist rule on the HPC cluster nodes to establish an SSH connection through the bastion node." }, { "name": "bastion_ssh_private_key", "value": "__NULL__", "type": "string", "secure": false, - "description" : "Bastion SSH private key path, which will be used to login to bastion host." + "description" : "Provide the private SSH key (named id_rsa) used during the creation and configuration of the bastion server to securely authenticate and connect to the bastion server. This allows access to internal network resources from a secure entry point. Note: The corresponding public SSH key (named id_rsa.pub) must already be available in the ~/.ssh/authorized_keys file on the bastion host to establish authentication." } ] } diff --git a/solutions/hpc/datasource.tf b/solutions/hpc/datasource.tf index fac7dc67..a8cd1fcd 100644 --- a/solutions/hpc/datasource.tf +++ b/solutions/hpc/datasource.tf @@ -39,11 +39,6 @@ data "ibm_is_vpc" "vpc" { depends_on = [module.landing_zone.vpc_name, data.ibm_is_vpc.existing_vpc] } -data "ibm_is_vpc_address_prefixes" "existing_vpc" { - #count = var.vpc_name != "" ? 1 : 0 - vpc = data.ibm_is_vpc.vpc.id -} - data "ibm_is_subnet" "existing_subnet" { # Lookup for this Subnet resources only if var.cluster_subnet_ids is not empty count = (length(var.cluster_subnet_ids) == 1 && var.vpc_name != null) ? length(var.cluster_subnet_ids) : 0 diff --git a/solutions/hpc/input_validation.tf b/solutions/hpc/input_validation.tf index c253320f..3c65c981 100644 --- a/solutions/hpc/input_validation.tf +++ b/solutions/hpc/input_validation.tf @@ -6,31 +6,7 @@ # This file contains the complete information on all the validations performed from the code during the generate plan process # Validations are performed to make sure, the appropriate error messages are displayed to user in-order to provide required input parameter -# Module for the private cluster_subnet and login subnet cidr validation. -module "ipvalidation_cluster_subnet" { - count = length(var.vpc_cluster_private_subnets_cidr_blocks) - source = "../../modules/custom/subnet_cidr_check" - subnet_cidr = var.vpc_cluster_private_subnets_cidr_blocks[count.index] - vpc_address_prefix = [local.prefixes_in_given_zone_1][count.index] -} - -module "ipvalidation_login_subnet" { - source = "../../modules/custom/subnet_cidr_check" - subnet_cidr = var.vpc_cluster_login_private_subnets_cidr_blocks[0] - vpc_address_prefix = local.prefixes_in_given_zone_login -} - locals { - # Copy address prefixes and CIDR of given zone into a new tuple - prefixes_in_given_zone_login = [ - for prefix in data.ibm_is_vpc_address_prefixes.existing_vpc[*].address_prefixes[0] : - prefix.cidr if prefix.zone[0].name == var.zones[0]] - - # To get the address prefix of zone1 - prefixes_in_given_zone_1 = [ - for prefix in data.ibm_is_vpc_address_prefixes.existing_vpc[*].address_prefixes[0] : - prefix.cidr if var.zones[0] == prefix.zone[0].name] - # validation for the boot volume encryption toggling. validate_enable_customer_managed_encryption = anytrue([alltrue([var.kms_key_name != null, var.kms_instance_name != null]), (var.kms_key_name == null), (var.key_management != "key_protect")]) validate_enable_customer_managed_encryption_msg = "Please make sure you are passing the kms_instance_name if you are passing kms_key_name." diff --git a/solutions/hpc/variables.tf b/solutions/hpc/variables.tf index a783d715..941dd2df 100644 --- a/solutions/hpc/variables.tf +++ b/solutions/hpc/variables.tf @@ -176,7 +176,7 @@ variable "login_node_instance_type" { } variable "management_image_name" { type = string - default = "hpcaas-lsf10-rhel88-v8" + default = "hpcaas-lsf10-rhel88-v9" description = "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." } @@ -184,13 +184,13 @@ variable "management_image_name" { variable "compute_image_name" { type = string default = "hpcaas-lsf10-rhel88-compute-v5" - description = "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster dynamic compute nodes. By default, the solution uses a RHEL 8-8 base OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v4). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." + description = "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster dynamic compute nodes. By default, the solution uses a RHEL 8-8 base OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v5). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." } variable "login_image_name" { type = string default = "hpcaas-lsf10-rhel88-compute-v5" - description = "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-8 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v4). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." + description = "Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-8 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v5). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering." } variable "management_node_instance_type" { @@ -496,7 +496,7 @@ variable "ldap_vsi_profile" { variable "ldap_vsi_osimage_name" { type = string - default = "ibm-ubuntu-22-04-3-minimal-amd64-1" + default = "ibm-ubuntu-22-04-4-minimal-amd64-3" description = "Image name to be used for provisioning the LDAP instances. By default ldap server are created on Ubuntu based OS flavour." } @@ -560,24 +560,24 @@ variable "TF_VALIDATION_SCRIPT_FILES" { variable "bastion_instance_name" { type = string default = null - description = "Bastion instance name. If none given then new bastion will be created." + description = "Provide the name of the bastion instance. If none given then new bastion will be created." } variable "bastion_instance_public_ip" { type = string default = null - description = "Bastion instance public ip address." + description = "Provide the public ip address of the bastion instance to establish the remote connection." } variable "bastion_security_group_id" { type = string default = null - description = "Bastion security group id." + description = "Provide the security group ID of the bastion server. This security group ID will be added as an allowlist rule on the HPC cluster nodes to establish an SSH connection through the bastion node." } variable "bastion_ssh_private_key" { type = string sensitive = true default = null - description = "Bastion SSH private key path, which will be used to login to bastion host." + description = "Provide the private SSH key (named id_rsa) used during the creation and configuration of the bastion server to securely authenticate and connect to the bastion server. This allows access to internal network resources from a secure entry point. Note: The corresponding public SSH key (named id_rsa.pub) must already be available in the ~/.ssh/authorized_keys file on the bastion host to establish authentication." } diff --git a/tests/lsf/lsf_cluster_test_utils.go b/tests/lsf/lsf_cluster_test_utils.go index c766ff31..13dd96a6 100644 --- a/tests/lsf/lsf_cluster_test_utils.go +++ b/tests/lsf/lsf_cluster_test_utils.go @@ -218,10 +218,6 @@ func VerifyLoginNodeConfig( checkMasterNameErr := LSFCheckMasterName(t, sshLoginClient, expectedMasterName, logger) utils.LogVerificationResult(t, checkMasterNameErr, "check Master name on login node", logger) - // Verify Reservation ID - ReservationIDErr := HPCCheckReservationID(t, sshLoginClient, expectedReservationID, logger) - utils.LogVerificationResult(t, ReservationIDErr, "check Reservation ID on login node", logger) - // MTU check for login nodes mtuCheckErr := LSFMTUCheck(t, sshLoginClient, []string{loginNodeIP}, logger) utils.LogVerificationResult(t, mtuCheckErr, "MTU check on login node", logger) @@ -243,8 +239,12 @@ func VerifyLoginNodeConfig( utils.LogVerificationResult(t, fileMountErr, "File mount check on login node", logger) //Run job - jobErr := LSFRunJobs(t, sshLoginClient, jobCommand, logger) + jobErr := LSFRunJobs(t, sshLoginClient, LOGIN_NODE_EXECUTION_PATH+jobCommand, logger) //Added the executable path utils.LogVerificationResult(t, jobErr, "check Run job on login node", logger) + + // Verify LSF commands + lsfCmdErr := VerifyLSFCommands(t, sshLoginClient, "login", logger) + utils.LogVerificationResult(t, lsfCmdErr, "Check the 'lsf' command on the login node", logger) } // VerifyTestTerraformOutputs is a function that verifies the Terraform outputs for a test scenario. @@ -337,14 +337,14 @@ func VerifyManagementNodeLDAPConfig( fileMountErr := HPCCheckFileMountAsLDAPUser(t, sshLdapClient, "management", logger) utils.LogVerificationResult(t, fileMountErr, "check file mount as an LDAP user on the management node", logger) + // Verify LSF commands on management node as LDAP user + lsfCmdErr := VerifyLSFCommandsAsLDAPUser(t, sshLdapClient, ldapUserName, "management", logger) + utils.LogVerificationResult(t, lsfCmdErr, "Check the 'lsf' command as an LDAP user on the management node", logger) + // Run job jobErr := LSFRunJobsAsLDAPUser(t, sshLdapClient, jobCommand, ldapUserName, logger) utils.LogVerificationResult(t, jobErr, "check Run job as an LDAP user on the management node", logger) - // Verify LSF commands on management node as LDAP user - lsfCmdErr := VerifyLSFCommands(t, sshLdapClient, ldapUserName, logger) - utils.LogVerificationResult(t, lsfCmdErr, "Check the 'lsf' command as an LDAP user on the management node", logger) - // Loop through management node IPs and perform checks for i := 0; i < len(managementNodeIPList); i++ { sshLdapClientUser, connectionErr := utils.ConnectToHostAsLDAPUser(LSF_PUBLIC_HOST_NAME, bastionIP, managementNodeIPList[i], ldapUserName, ldapPassword) @@ -391,11 +391,11 @@ func VerifyLoginNodeLDAPConfig( utils.LogVerificationResult(t, fileMountErr, "check file mount as an LDAP user on the login node", logger) // Run job - jobErr := LSFRunJobsAsLDAPUser(t, sshLdapClient, jobCommand, ldapUserName, logger) + jobErr := LSFRunJobsAsLDAPUser(t, sshLdapClient, LOGIN_NODE_EXECUTION_PATH+jobCommand, ldapUserName, logger) utils.LogVerificationResult(t, jobErr, "check Run job as an LDAP user on the login node", logger) // Verify LSF commands on login node as LDAP user - lsfCmdErr := VerifyLSFCommands(t, sshLdapClient, ldapUserName, logger) + lsfCmdErr := VerifyLSFCommandsAsLDAPUser(t, sshLdapClient, ldapUserName, "login", logger) utils.LogVerificationResult(t, lsfCmdErr, "Check the 'lsf' command as an LDAP user on the login node", logger) } @@ -432,7 +432,7 @@ func VerifyComputeNodeLDAPConfig( utils.LogVerificationResult(t, fileMountErr, "check file mount as an LDAP user on the compute node", logger) // Verify LSF commands - lsfCmdErr := VerifyLSFCommands(t, sshLdapClient, ldapUserName, logger) + lsfCmdErr := VerifyLSFCommandsAsLDAPUser(t, sshLdapClient, ldapUserName, "compute", logger) utils.LogVerificationResult(t, lsfCmdErr, "Check the 'lsf' command as an LDAP user on the compute node", logger) // SSH connection to other compute nodes @@ -464,11 +464,11 @@ func VerifyPTRRecordsForManagementAndLoginNodes(t *testing.T, sClient *ssh.Clien } -// CreateServiceInstanceandKmsKey creates a service instance on IBM Cloud and a KMS key within that instance. +// CreateServiceInstanceAndKmsKey creates a service instance on IBM Cloud and a KMS key within that instance. // It logs into IBM Cloud using the provided API key, region, and resource group, then creates the service instance // and the KMS key with the specified names. It logs the results of each operation. // Returns:error - An error if any operation fails, otherwise nil. -func CreateServiceInstanceandKmsKey(t *testing.T, apiKey, expectedZone, expectedResourceGroup, kmsInstanceName, kmsKeyName string, logger *utils.AggregatedLogger) error { +func CreateServiceInstanceAndKmsKey(t *testing.T, apiKey, expectedZone, expectedResourceGroup, kmsInstanceName, kmsKeyName string, logger *utils.AggregatedLogger) error { // Create the service instance and return its GUID _, createInstanceErr := CreateServiceInstanceAndReturnGUID(t, apiKey, expectedZone, expectedResourceGroup, kmsInstanceName, logger) // Log the verification result for creating the service instance @@ -497,3 +497,69 @@ func DeleteServiceInstanceAndAssociatedKeys(t *testing.T, apiKey, expectedZone, // Log the verification result for deleting the service instance and associated KMS key utils.LogVerificationResult(t, deleteInstanceAndKey, "Delete Service Instance and associated KMS Key", logger) } + +// VerifyLSFDNS performs a DNS configuration check on a list of nodes using LSFDNSCheck function. +// It logs the verification result. +func VerifyLSFDNS(t *testing.T, sClient *ssh.Client, ipsList []string, domainName string, logger *utils.AggregatedLogger) { + dnsCheckErr := LSFDNSCheck(t, sClient, ipsList, domainName, logger) + utils.LogVerificationResult(t, dnsCheckErr, "dns check", logger) +} + +// VerifyCreateNewLdapUserAndManagementNodeLDAPConfig creates a new LDAP user, verifies the LDAP configuration on the +// management node by connecting via SSH, running jobs, and verifying LSF commands. It connects to the management node +// as the new LDAP user and runs specified commands to ensure the new user is properly configured. +// It logs into the LDAP server using the provided SSH client, admin password, domain name, and user information, then +// verifies the configuration on the management node. +// Returns an error if any step fails +func VerifyCreateNewLdapUserAndManagementNodeLDAPConfig( + t *testing.T, + sldapClient *ssh.Client, + bastionIP string, + ldapServerIP string, + managementNodeIPList []string, + jobCommand string, + ldapAdminPassword string, + ldapDomainName string, + ldapUserName string, + ldapUserPassword string, + newLdapUserName string, + logger *utils.AggregatedLogger, +) { + + // Add a new LDAP user + if err := HPCAddNewLDAPUser(t, sldapClient, ldapAdminPassword, ldapDomainName, ldapUserName, newLdapUserName, logger); err != nil { + utils.LogVerificationResult(t, err, "add new LDAP user", logger) + return + } + + // Connect to the management node via SSH as the new LDAP user + sshLdapClientUser, err := utils.ConnectToHostAsLDAPUser(LSF_PUBLIC_HOST_NAME, bastionIP, managementNodeIPList[0], newLdapUserName, ldapUserPassword) + if err != nil { + utils.LogVerificationResult(t, err, "connect to the management node via SSH as the new LDAP user", logger) + return + } + defer sshLdapClientUser.Close() + + // Run job as the new LDAP user + if err := LSFRunJobsAsLDAPUser(t, sshLdapClientUser, jobCommand, newLdapUserName, logger); err != nil { + utils.LogVerificationResult(t, err, "run job as the new LDAP user on the management node", logger) + } + + // Verify LSF commands on the management node as the new LDAP user + if err := VerifyLSFCommandsAsLDAPUser(t, sshLdapClientUser, newLdapUserName, "management", logger); err != nil { + utils.LogVerificationResult(t, err, "Check the 'lsf' command as the new LDAP user on the management node", logger) + } + +} + +// ValidateCosServiceInstanceAndVpcFlowLogs checks both the COS service instance and the VPC flow logs. +// It logs the verification result. +func ValidateCosServiceInstanceAndVpcFlowLogs(t *testing.T, apiKey, expectedZone, expectedResourceGroup, clusterPrefix string, logger *utils.AggregatedLogger) { + // Verify the COS service instance details + cosErr := VerifyCosServiceInstance(t, apiKey, expectedZone, expectedResourceGroup, clusterPrefix, logger) + utils.LogVerificationResult(t, cosErr, "COS check", logger) + + // Verify the VPC flow log details + flowLogsErr := ValidateFlowLogs(t, apiKey, expectedZone, expectedResourceGroup, clusterPrefix, logger) + utils.LogVerificationResult(t, flowLogsErr, "VPC flow logs check", logger) +} diff --git a/tests/lsf/lsf_cluster_test_validation.go b/tests/lsf/lsf_cluster_test_validation.go index a0d6b73f..a26c135a 100644 --- a/tests/lsf/lsf_cluster_test_validation.go +++ b/tests/lsf/lsf_cluster_test_validation.go @@ -62,6 +62,9 @@ func ValidateClusterConfiguration(t *testing.T, options *testhelper.TestOptions, // Verify SSH key VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "management", managementNodeIPList, testLogger) + //VerifyLSFDNS on management nodes + VerifyLSFDNS(t, sshClient, managementNodeIPList, expectedDnsDomainName, testLogger) + // Perform failover and failback FailoverAndFailback(t, sshClient, JOB_COMMAND_MED, testLogger) @@ -91,6 +94,9 @@ func ValidateClusterConfiguration(t *testing.T, options *testhelper.TestOptions, // Verify SSH key VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "compute", computeNodeIPList, testLogger) + //VerifyLSFDNS on compute nodes + VerifyLSFDNS(t, sshClient, computeNodeIPList, expectedDnsDomainName, testLogger) + // Connect to the login node via SSH and handle connection errors sshLoginNodeClient, connectionErr := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, loginNodeIP) require.NoError(t, connectionErr, "Failed to connect to the login node via SSH: %v", connectionErr) @@ -109,6 +115,9 @@ func ValidateClusterConfiguration(t *testing.T, options *testhelper.TestOptions, // Verify PTR records VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) + //VerifyLSFDNS on login node + VerifyLSFDNS(t, sshClient, []string{loginNodeIP}, expectedDnsDomainName, testLogger) + // Verify file share encryption VerifyFileShareEncryption(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, expectedKeyManagement, testLogger) testLogger.Info(t, t.Name()+" Validation ended") @@ -165,6 +174,9 @@ func ValidateClusterConfigurationWithAPPCenter(t *testing.T, options *testhelper // Verify SSH key VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "management", managementNodeIPList, testLogger) + //VerifyLSFDNS on management nodes + VerifyLSFDNS(t, sshClient, managementNodeIPList, expectedDnsDomainName, testLogger) + // Perform failover and failback FailoverAndFailback(t, sshClient, JOB_COMMAND_MED, testLogger) @@ -194,6 +206,9 @@ func ValidateClusterConfigurationWithAPPCenter(t *testing.T, options *testhelper // Verify SSH key VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "compute", computeNodeIPList, testLogger) + //VerifyLSFDNS on compute nodes + VerifyLSFDNS(t, sshClient, computeNodeIPList, expectedDnsDomainName, testLogger) + // Connect to the login node via SSH and handle connection errors sshLoginNodeClient, connectionErr := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, loginNodeIP) require.NoError(t, connectionErr, "Failed to connect to the login node via SSH: %v", connectionErr) @@ -218,6 +233,9 @@ func ValidateClusterConfigurationWithAPPCenter(t *testing.T, options *testhelper // Verify PTR records VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) + //VerifyLSFDNS on login node + VerifyLSFDNS(t, sshClient, []string{loginNodeIP}, expectedDnsDomainName, testLogger) + // Verify file share encryption VerifyFileShareEncryption(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, expectedKeyManagement, testLogger) @@ -313,9 +331,9 @@ func ValidateLDAPClusterConfiguration(t *testing.T, options *testhelper.TestOpti expectedResourceGroup := options.TerraformVars["resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedLdapDomain := options.TerraformVars["ldap_basedns"].(string) - expectedLdapAdminPassword := options.TerraformVars["ldap_admin_password"].(string) - expectedLdapUserName := options.TerraformVars["ldap_user_name"].(string) - expectedLdapUserPassword := options.TerraformVars["ldap_user_password"].(string) + ldapAdminPassword := options.TerraformVars["ldap_admin_password"].(string) + ldapUserName := options.TerraformVars["ldap_user_name"].(string) + ldapUserPassword := options.TerraformVars["ldap_user_password"].(string) expectedDnsDomainName, ok := options.TerraformVars["dns_domain_name"].(map[string]string)["compute"] assert.False(t, !ok, "Key 'compute' does not exist in dns_domain_name map or dns_domain_name is not of type map[string]string") @@ -399,20 +417,23 @@ func ValidateLDAPClusterConfiguration(t *testing.T, options *testhelper.TestOpti require.NoError(t, connectionErr, "Failed to connect to the ldap server via SSH: %v", connectionErr) // Check ldap server status - CheckLDAPServerStatus(t, sshLdapClient, expectedLdapAdminPassword, expectedLdapDomain, expectedLdapUserName, testLogger) + CheckLDAPServerStatus(t, sshLdapClient, ldapAdminPassword, expectedLdapDomain, ldapUserName, testLogger) // Verify management node ldap config - VerifyManagementNodeLDAPConfig(t, sshClient, bastionIP, LdapServerIP, managementNodeIPList, JOB_COMMAND_LOW, expectedLdapDomain, expectedLdapUserName, expectedLdapUserPassword, testLogger) + VerifyManagementNodeLDAPConfig(t, sshClient, bastionIP, LdapServerIP, managementNodeIPList, JOB_COMMAND_LOW, expectedLdapDomain, ldapUserName, ldapUserPassword, testLogger) + + // Verify compute node ldap config + VerifyComputeNodeLDAPConfig(t, bastionIP, LdapServerIP, computeNodeIPList, expectedLdapDomain, ldapUserName, ldapUserPassword, testLogger) // Verify login node ldap config - VerifyLoginNodeLDAPConfig(t, sshClient, bastionIP, loginNodeIP, LdapServerIP, JOB_COMMAND_LOW, expectedLdapDomain, expectedLdapUserName, expectedLdapUserPassword, testLogger) + VerifyLoginNodeLDAPConfig(t, sshClient, bastionIP, loginNodeIP, LdapServerIP, JOB_COMMAND_LOW, expectedLdapDomain, ldapUserName, ldapUserPassword, testLogger) + + // Verify able to create LDAP User on LDAP Server and can able to perform LSF actions using new user + VerifyCreateNewLdapUserAndManagementNodeLDAPConfig(t, sshLdapClient, bastionIP, LdapServerIP, managementNodeIPList, JOB_COMMAND_LOW, ldapAdminPassword, expectedLdapDomain, ldapUserName, ldapUserPassword, "user2", testLogger) // Verify PTR records VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) - // Verify compute node ldap config - VerifyComputeNodeLDAPConfig(t, bastionIP, LdapServerIP, computeNodeIPList, expectedLdapDomain, expectedLdapUserName, expectedLdapUserPassword, testLogger) - testLogger.Info(t, t.Name()+" Validation ended") } @@ -436,9 +457,9 @@ func ValidatePACANDLDAPClusterConfiguration(t *testing.T, options *testhelper.Te expectedResourceGroup := options.TerraformVars["resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedLdapDomain := options.TerraformVars["ldap_basedns"].(string) - expectedLdapAdminPassword := options.TerraformVars["ldap_admin_password"].(string) - expectedLdapUserName := options.TerraformVars["ldap_user_name"].(string) - expectedLdapUserPassword := options.TerraformVars["ldap_user_password"].(string) + ldapAdminPassword := options.TerraformVars["ldap_admin_password"].(string) + ldapUserName := options.TerraformVars["ldap_user_name"].(string) + ldapUserPassword := options.TerraformVars["ldap_user_password"].(string) expectedDnsDomainName, ok := options.TerraformVars["dns_domain_name"].(map[string]string)["compute"] assert.False(t, !ok, "Key 'compute' does not exist in dns_domain_name map or dns_domain_name is not of type map[string]string") @@ -528,20 +549,23 @@ func ValidatePACANDLDAPClusterConfiguration(t *testing.T, options *testhelper.Te require.NoError(t, connectionErr, "Failed to connect to the ldap server via SSH: %v", connectionErr) // Check ldap server status - CheckLDAPServerStatus(t, sshLdapClient, expectedLdapAdminPassword, expectedLdapDomain, expectedLdapUserName, testLogger) + CheckLDAPServerStatus(t, sshLdapClient, ldapAdminPassword, expectedLdapDomain, ldapUserName, testLogger) // Verify management node ldap config - VerifyManagementNodeLDAPConfig(t, sshClient, bastionIP, LdapServerIP, managementNodeIPList, JOB_COMMAND_LOW, expectedLdapDomain, expectedLdapUserName, expectedLdapUserPassword, testLogger) + VerifyManagementNodeLDAPConfig(t, sshClient, bastionIP, LdapServerIP, managementNodeIPList, JOB_COMMAND_LOW, expectedLdapDomain, ldapUserName, ldapUserPassword, testLogger) + + // Verify compute node ldap config + VerifyComputeNodeLDAPConfig(t, bastionIP, LdapServerIP, computeNodeIPList, expectedLdapDomain, ldapUserName, ldapUserPassword, testLogger) // Verify login node ldap config - VerifyLoginNodeLDAPConfig(t, sshClient, bastionIP, loginNodeIP, LdapServerIP, JOB_COMMAND_LOW, expectedLdapDomain, expectedLdapUserName, expectedLdapUserPassword, testLogger) + VerifyLoginNodeLDAPConfig(t, sshClient, bastionIP, loginNodeIP, LdapServerIP, JOB_COMMAND_LOW, expectedLdapDomain, ldapUserName, ldapUserPassword, testLogger) + + // Verify able to create LDAP User on LDAP Server and can able to perform LSF actions using new user + VerifyCreateNewLdapUserAndManagementNodeLDAPConfig(t, sshLdapClient, bastionIP, LdapServerIP, managementNodeIPList, JOB_COMMAND_LOW, ldapAdminPassword, expectedLdapDomain, ldapUserName, ldapUserPassword, "user2", testLogger) // Verify PTR records VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) - // Verify compute node ldap config - VerifyComputeNodeLDAPConfig(t, bastionIP, LdapServerIP, computeNodeIPList, expectedLdapDomain, expectedLdapUserName, expectedLdapUserPassword, testLogger) - testLogger.Info(t, t.Name()+" Validation ended") } @@ -641,3 +665,79 @@ func ValidateClusterConfigurationWithAPPCenterForExistingEnv( // Log the end of validation testLogger.Info(t, t.Name()+" Validation ended") } + +// ValidateBasicClusterConfigurationWithVPCFlowLogsAndCos validates the basic cluster configuration +// including VPC flow logs and COS service instance. +// It performs validation tasks on essential aspects of the cluster setup, +// such as management node, compute nodes, and login node configurations. +// Additionally, it ensures proper connectivity and functionality. +// This function doesn't return any value but logs errors and validation steps during the process. +func ValidateBasicClusterConfigurationWithVPCFlowLogsAndCos(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { + // Retrieve cluster information from Terraform variables from Terraform variables + expectedClusterID := options.TerraformVars["cluster_id"].(string) + expectedReservationID := options.TerraformVars["reservation_id"].(string) + expectedMasterName := options.TerraformVars["cluster_prefix"].(string) + expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedKeyManagement := options.TerraformVars["key_management"].(string) + expectedZone := options.TerraformVars["zones"].([]string)[0] + + expectedHyperthreadingEnabled, _ := strconv.ParseBool(options.TerraformVars["hyperthreading_enabled"].(string)) + + JOB_COMMAND_LOW := GetJobCommand(expectedZone, "low") + + // Run the test and handle errors + output, err := options.RunTest() + require.NoError(t, err, "Error running consistency test: %v", err) + require.NotNil(t, output, "Expected non-nil output, but got nil") + + // Log successful cluster creation + testLogger.Info(t, t.Name()+" Cluster created successfully") + + // Retrieve server IPs and handle errors + bastionIP, managementNodeIPList, loginNodeIP, ipRetrievalError := utils.GetServerIPs(t, options, testLogger) + require.NoError(t, ipRetrievalError, "Error occurred while getting server IPs: %v", ipRetrievalError) + + testLogger.Info(t, t.Name()+" Validation started ......") + + // Connect to the master node via SSH and handle connection errors + sshClient, connectionErr := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList[0]) + require.Nil(t, connectionErr, "Failed to connect to the master via SSH: %v", connectionErr) + defer sshClient.Close() + + testLogger.Info(t, "SSH connection to the master successful") + t.Log("Validation in progress. Please wait...") + + // Verify management node configuration + VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, JOB_COMMAND_LOW, EXPECTED_LSF_VERSION, testLogger) + + defer sshClient.Close() + // Wait for dynamic node disappearance + defer func() { + if err := LSFWaitForDynamicNodeDisappearance(t, sshClient, testLogger); err != nil { + t.Errorf("Error in LSFWaitForDynamicNodeDisappearance: %v", err) + } + }() + + // Get dynamic compute node IPs and handle errors + computeNodeIPList, computeIPErr := LSFGETDynamicComputeNodeIPs(t, sshClient, testLogger) + require.Nil(t, computeIPErr, "Error getting dynamic compute node IPs: %v", computeIPErr) + + // Verify compute node configuration + VerifyComputetNodeConfig(t, sshClient, expectedHyperthreadingEnabled, computeNodeIPList, testLogger) + + // Connect to the login node via SSH and handle connection errors + sshLoginNodeClient, connectionErr := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, loginNodeIP) + require.NoError(t, connectionErr, "Failed to connect to the login node via SSH: %v", connectionErr) + defer sshLoginNodeClient.Close() + + // Verify login node configuration + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, JOB_COMMAND_LOW, EXPECTED_LSF_VERSION, testLogger) + + // Verify file share encryption + VerifyFileShareEncryption(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, expectedKeyManagement, testLogger) + + // Validate COS service instance and VPC flow logs + ValidateCosServiceInstanceAndVpcFlowLogs(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, testLogger) + + testLogger.Info(t, t.Name()+" Validation ended") +} diff --git a/tests/lsf/lsf_cluster_utils.go b/tests/lsf/lsf_cluster_utils.go index 096b74b1..72417d56 100644 --- a/tests/lsf/lsf_cluster_utils.go +++ b/tests/lsf/lsf_cluster_utils.go @@ -110,7 +110,7 @@ func LSFIPRouteCheck(t *testing.T, sClient *ssh.Client, ipsList []string, logger func LSFCheckClusterID(t *testing.T, sClient *ssh.Client, expectedClusterID string, logger *utils.AggregatedLogger) error { // Execute the 'lsid' command to get the cluster ID - command := "lsid" + command := "source /opt/ibm/lsf/conf/profile.lsf; lsid" output, err := utils.RunCommandInSSHSession(sClient, command) if err != nil { return fmt.Errorf("failed to execute 'lsid' command: %w", err) @@ -133,7 +133,7 @@ func LSFCheckClusterID(t *testing.T, sClient *ssh.Client, expectedClusterID stri // Returns an error if the checks fail. func LSFCheckMasterName(t *testing.T, sClient *ssh.Client, expectedMasterName string, logger *utils.AggregatedLogger) error { // Execute the 'lsid' command to get the cluster ID - command := "lsid" + command := "source /opt/ibm/lsf/conf/profile.lsf; lsid" output, err := utils.RunCommandInSSHSession(sClient, command) if err != nil { return fmt.Errorf("failed to execute 'lsid' command: %w", err) @@ -201,6 +201,8 @@ func LSFRestartDaemons(t *testing.T, sClient *ssh.Client, logger *utils.Aggregat return fmt.Errorf("failed to run 'lsf_daemons restart' command: %w", err) } + logger.Info(t, string(out)) + time.Sleep(defaultSleepDuration) // Check if the restart was successful @@ -226,11 +228,10 @@ func LSFRestartDaemons(t *testing.T, sClient *ssh.Client, logger *utils.Aggregat return nil } -// LSFControlBctrld performs start or stop operation on the bctrld daemon on the specified machine. -// The function returns an error if any step fails or if an invalid value (other than 'start' or 'stop') is provided. +// LSFControlBctrld performs start or stop operations on the bctrld daemon on the specified machine. +// It returns an error if any step fails or if an invalid value (other than 'start' or 'stop') is provided. // It executes the 'bctrld' command with the specified operation and waits for the daemon to start or stop. func LSFControlBctrld(t *testing.T, sClient *ssh.Client, startOrStop string, logger *utils.AggregatedLogger) error { - // Make startOrStop case-insensitive startOrStop = strings.ToLower(startOrStop) @@ -239,31 +240,48 @@ func LSFControlBctrld(t *testing.T, sClient *ssh.Client, startOrStop string, log return fmt.Errorf("invalid operation type. Please specify 'start' or 'stop'") } - // Execute the 'bctrld' command to start or stop the sbd daemon - command := fmt.Sprintf("bctrld %s sbd", startOrStop) - _, bctrldErr := utils.RunCommandInSSHSession(sClient, command) - if bctrldErr != nil { - return fmt.Errorf("failed to run '%s' command: %w", command, bctrldErr) + var command string + + // Construct the command based on the operation type + if startOrStop == "stop" { + command = "bctrld stop sbd" + } else { + command = "sudo su -l root -c 'systemctl restart lsfd'" + } + + // Execute the command + if _, err := utils.RunCommandInSSHSession(sClient, command); err != nil { + return fmt.Errorf("failed to run '%s' command: %w", command, err) } // Sleep for a specified duration to allow time for the daemon to start or stop - time.Sleep(20 * time.Second) + if startOrStop == "stop" { + time.Sleep(30 * time.Second) + } else { + time.Sleep(120 * time.Second) + } // Check the status of the daemon using the 'bhosts -w' command on the remote SSH server - cmd := "bhosts -w" - out, err := utils.RunCommandInSSHSession(sClient, cmd) + statusCmd := "bhosts -w" + out, err := utils.RunCommandInSSHSession(sClient, statusCmd) if err != nil { - return fmt.Errorf("failed to run 'bhosts' command on machine IP: %w", err) + return fmt.Errorf("failed to run 'bhosts' command: %w", err) } // Count the number of unreachable nodes unreachCount := strings.Count(string(out), "unreach") // Check the output based on the startOrStop parameter - if (startOrStop == "start" && unreachCount != 0) || (startOrStop == "stop" && unreachCount != 1) { + expectedUnreachCount := 0 + if startOrStop == "stop" { + expectedUnreachCount = 1 + } + + if unreachCount != expectedUnreachCount { // If the unreachable node count does not match the expected count, return an error return fmt.Errorf("failed to %s the sbd daemon on the management node", startOrStop) } + // Log success if no errors occurred logger.Info(t, fmt.Sprintf("Daemon %s successfully", startOrStop)) return nil @@ -391,7 +409,7 @@ func LSFRunJobs(t *testing.T, sClient *ssh.Client, jobCmd string, logger *utils. for time.Since(startTime) < jobMaxTimeout { // Run 'bjobs -a' command on the remote SSH server - command := "bjobs -a" + command := LOGIN_NODE_EXECUTION_PATH + "bjobs -a" // Run the 'bjobs' command to get information about all jobs jobStatus, err := utils.RunCommandInSSHSession(sClient, command) @@ -838,7 +856,7 @@ func LSFCheckSSHKeyForComputeNodes(t *testing.T, sClient *ssh.Client, computeNod func CheckLSFVersion(t *testing.T, sClient *ssh.Client, expectedVersion string, logger *utils.AggregatedLogger) error { // Execute the 'lsid' command to get the cluster ID - command := "lsid" + command := LOGIN_NODE_EXECUTION_PATH + "lsid" output, err := utils.RunCommandInSSHSession(sClient, command) if err != nil { @@ -1288,7 +1306,7 @@ func LSFRunJobsAsLDAPUser(t *testing.T, sClient *ssh.Client, jobCmd, ldapUser st for time.Since(startTime) < jobMaxTimeout { // Run 'bjobs -a' command on the remote SSH server - command := "bjobs -a" + command := LOGIN_NODE_EXECUTION_PATH + "bjobs -a" // Run the 'bjobs' command to get information about all jobs jobStatus, err := utils.RunCommandInSSHSession(sClient, command) @@ -1421,27 +1439,80 @@ func verifyDirectoriesAsLdapUser(t *testing.T, sClient *ssh.Client, hostname str } // VerifyLSFCommands verifies the LSF commands on the remote machine. -func VerifyLSFCommands(t *testing.T, sClient *ssh.Client, userName string, logger *utils.AggregatedLogger) error { +// It checks the commands' execution based on the node type. +func VerifyLSFCommands(t *testing.T, sClient *ssh.Client, nodeType string, logger *utils.AggregatedLogger) error { // Define commands to be executed commands := []string{ - "whoami", - "bhosts -w", + "lsid", "bjobs -a", "bhosts -w", + "bqueues", } + nodeType = strings.TrimSpace(strings.ToLower(nodeType)) + // Iterate over commands for _, command := range commands { + var output string + var err error + // Execute command on SSH session - output, err := utils.RunCommandInSSHSession(sClient, command) + switch { + case strings.Contains(nodeType, "compute"): + output, err = utils.RunCommandInSSHSession(sClient, COMPUTE_NODE_EXECUTION_PATH+command) + case strings.Contains(nodeType, "login"): + output, err = utils.RunCommandInSSHSession(sClient, LOGIN_NODE_EXECUTION_PATH+command) + default: + output, err = utils.RunCommandInSSHSession(sClient, command) + } + + if err != nil { + return fmt.Errorf("failed to execute command '%s' via SSH: %v", command, err) + } + + if strings.TrimSpace(output) == "" { + return fmt.Errorf("output for command '%s' is empty", command) + } + } + + return nil +} + +// VerifyLSFCommandsAsLDAPUser verifies the LSF commands on the remote machine. +// It checks the commands' execution as the specified LDAP user. +func VerifyLSFCommandsAsLDAPUser(t *testing.T, sClient *ssh.Client, userName, nodeType string, logger *utils.AggregatedLogger) error { + // Define commands to be executed + commands := []string{ + "whoami", + "lsid", + "bhosts -w", + "lshosts", + } + + nodeType = strings.TrimSpace(strings.ToLower(nodeType)) + + // Iterate over commands + for _, command := range commands { + var output string + var err error + + // Execute command on SSH session + if strings.Contains(nodeType, "compute") { + output, err = utils.RunCommandInSSHSession(sClient, COMPUTE_NODE_EXECUTION_PATH+command) + } else if strings.Contains(nodeType, "login") { + output, err = utils.RunCommandInSSHSession(sClient, LOGIN_NODE_EXECUTION_PATH+command) + } else { + output, err = utils.RunCommandInSSHSession(sClient, command) + } + if err != nil { return fmt.Errorf("failed to execute command '%s' via SSH: %v", command, err) } + if command == "whoami" { if !utils.VerifyDataContains(t, strings.TrimSpace(output), userName, logger) { return fmt.Errorf("unexpected user: expected '%s', got '%s'", userName, strings.TrimSpace(output)) } - // Check if the output is not empty } else if strings.TrimSpace(output) == "" { return fmt.Errorf("output for command '%s' is empty", command) } @@ -1748,3 +1819,192 @@ func CreateKey(t *testing.T, apiKey, region, resourceGroup, instanceName, keyNam logger.Info(t, fmt.Sprintf("Key '%s' created successfully in service instance '%s'", keyName, serviceInstanceID)) return nil } + +// LSFDNSCheck checks the DNS configuration on a list of nodes to ensure it contains the expected domain. +// It supports both Ubuntu and RHEL-based systems by executing the appropriate DNS check command. +// The function logs the results and returns an error if the DNS configuration is not as expected. +// Returns an error if the DNS configuration is not as expected or if any command execution fails. +func LSFDNSCheck(t *testing.T, sClient *ssh.Client, ipsList []string, domain string, logger *utils.AggregatedLogger) error { + // Commands to check DNS on different OS types + rhelDNSCheckCmd := "cat /etc/resolv.conf" + ubuntuDNSCheckCmd := "resolvectl status" + + // Check if the node list is empty + if len(ipsList) == 0 { + return fmt.Errorf("ERROR: ips cannot be empty") + } + + // Loop through each IP in the list + for _, ip := range ipsList { + var dnsCmd string + + // Get the OS name of the compute node + osName, osNameErr := GetOSNameOfNode(t, sClient, ip, logger) + if osNameErr != nil { + return osNameErr + } + + // Determine the appropriate command to check DNS based on the OS + switch strings.ToLower(osName) { + case "ubuntu": + dnsCmd = ubuntuDNSCheckCmd + default: + dnsCmd = rhelDNSCheckCmd + } + + // Build the SSH command to check DNS on the node + command := fmt.Sprintf("ssh %s %s", ip, dnsCmd) + + // Execute the command and get the output + output, err := utils.RunCommandInSSHSession(sClient, command) + if err != nil { + return fmt.Errorf("failed to execute '%s' command on (%s) node: %v", dnsCmd, ip, err) + } + + // Check if the output contains the domain name + if strings.Contains(strings.ToLower(osName), "rhel") { + if !utils.VerifyDataContains(t, output, domain, logger) && utils.VerifyDataContains(t, output, "Generated by NetworkManager", logger) { + return fmt.Errorf("DNS check failed on (%s) node and found:\n%s", ip, output) + } + } else { // For other OS types, currently only Ubuntu + if !utils.VerifyDataContains(t, output, domain, logger) { + return fmt.Errorf("DNS check failed on (%s) node and found:\n%s", ip, output) + } + } + + // Log a success message + logger.Info(t, fmt.Sprintf("DNS is correctly set for (%s) node", ip)) + } + + return nil +} + +// HPCAddNewLDAPUser adds a new LDAP user by modifying an existing user's configuration and running necessary commands. +// It reads the existing LDAP user configuration, replaces the existing user information with the new LDAP user +// information, creates a new LDIF file on the LDAP server, and then runs LDAP commands to add the new user. Finally, it +// verifies the addition of the new LDAP user by searching the LDAP server. +// Returns an error if the if any step fails +func HPCAddNewLDAPUser(t *testing.T, sClient *ssh.Client, ldapAdminPassword, ldapDomain, ldapUser, newLdapUser string, logger *utils.AggregatedLogger) error { + // Define the command to read the existing LDAP user configuration + getLDAPUserConf := "cat /opt/users.ldif" + actual, err := utils.RunCommandInSSHSession(sClient, getLDAPUserConf) + if err != nil { + return fmt.Errorf("failed to execute command '%s' via SSH: %v", getLDAPUserConf, err) + } + + // Replace the existing LDAP user name with the new LDAP user name + ldifContent := strings.ReplaceAll(actual, ldapUser, newLdapUser) + + // Create the new LDIF file on the LDAP server + _, fileCreationErr := utils.ToCreateFileWithContent(t, sClient, ".", "user2.ldif", ldifContent, logger) + if fileCreationErr != nil { + return fmt.Errorf("failed to create file on LDAP server: %w", fileCreationErr) + } + + // Parse the LDAP domain for reuse + domainParts := strings.Split(ldapDomain, ".") + if len(domainParts) != 2 { + return fmt.Errorf("invalid LDAP domain format: %s", ldapDomain) + } + dc1, dc2 := domainParts[0], domainParts[1] + + // Define the command to add the new LDAP user using the ldapadd command + ldapAddCmd := fmt.Sprintf( + "ldapadd -x -D cn=admin,dc=%s,dc=%s -w %s -f user2.ldif", + dc1, dc2, ldapAdminPassword, + ) + ldapAddOutput, err := utils.RunCommandInSSHSession(sClient, ldapAddCmd) + if err != nil { + return fmt.Errorf("failed to execute command '%s' via SSH: %v", ldapAddCmd, err) + } + + // Verify the new LDAP user exists in the search results + if !utils.VerifyDataContains(t, ldapAddOutput, "uid="+newLdapUser, logger) { + return fmt.Errorf("LDAP user %s not found in add command output", newLdapUser) + } + + // Define the command to search for the new LDAP user to verify the addition + ldapSearchCmd := fmt.Sprintf( + "ldapsearch -x -D \"cn=admin,dc=%s,dc=%s\" -w %s -b \"ou=people,dc=%s,dc=%s\" -s sub \"(objectClass=*)\"", + dc1, dc2, ldapAdminPassword, dc1, dc2, + ) + ldapSearchOutput, err := utils.RunCommandInSSHSession(sClient, ldapSearchCmd) + if err != nil { + return fmt.Errorf("failed to execute command '%s' via SSH: %v", ldapSearchCmd, err) + } + + // Verify the new LDAP user exists in the search results + if !utils.VerifyDataContains(t, ldapSearchOutput, "uid: "+newLdapUser, logger) { + return fmt.Errorf("LDAP user %s not found in search results", newLdapUser) + } + + logger.Info(t, fmt.Sprintf("New LDAP user %s created successfully", newLdapUser)) + return nil +} + +// VerifyCosServiceInstance verifies if the Cloud Object Storage (COS) service instance details. +// and correctly set in the specified resource group and cluster prefix. +// Returns: An error if the verification fails, otherwise nil +func VerifyCosServiceInstance(t *testing.T, apiKey, region, resourceGroup, clusterPrefix string, logger *utils.AggregatedLogger) error { + + // If the resource group is "null", set it to a custom resource group with the format "clusterPrefix-workload-rg" + if strings.Contains(resourceGroup, "null") { + resourceGroup = fmt.Sprintf("%s-workload-rg", clusterPrefix) + } + + // Log in to IBM Cloud using the API key and VPC region + if err := utils.LoginIntoIBMCloudUsingCLI(t, apiKey, region, resourceGroup); err != nil { + return fmt.Errorf("failed to log in to IBM Cloud: %w", err) + } + + // Construct the command to check for the COS service instance + resourceCosServiceInstanceCmd := fmt.Sprintf("ibmcloud resource service-instances --service-name cloud-object-storage | grep %s-hpc-cos", clusterPrefix) + cosServiceInstanceCmd := exec.Command("bash", "-c", resourceCosServiceInstanceCmd) + output, err := cosServiceInstanceCmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to execute command to check COS service instance: %w", err) + } + + logger.Info(t, "cos details : "+string(output)) + + // Check if the COS service instance contains the cluster prefix and is active + if !utils.VerifyDataContains(t, string(output), clusterPrefix, logger) { + return fmt.Errorf("COS service instance with prefix %s not found", clusterPrefix) + } + + if !utils.VerifyDataContains(t, string(output), "active", logger) { + return fmt.Errorf("COS service instance with prefix %s is not active", clusterPrefix) + } + + logger.Info(t, "COS service instance verified as expected") + return nil +} + +// ValidateFlowLogs verifies if the flow logs are being created successfully or not +// and correctly set in the specified resource group and cluster prefix. +// Returns: An error if the verification fails, otherwise nil +func ValidateFlowLogs(t *testing.T, apiKey, region, resourceGroup, clusterPrefix string, logger *utils.AggregatedLogger) error { + + // If the resource group is "null", set it to a custom resource group with the format "clusterPrefix-workload-rg" + if strings.Contains(resourceGroup, "null") { + resourceGroup = fmt.Sprintf("%s-workload-rg", clusterPrefix) + } + // Log in to IBM Cloud using the API key and region + if err := utils.LoginIntoIBMCloudUsingCLI(t, apiKey, region, resourceGroup); err != nil { + return fmt.Errorf("failed to log in to IBM Cloud: %w", err) + } + flowLogName := fmt.Sprintf("%s-hpc-vpc", clusterPrefix) + // Fetching the flow log details + retrieveFlowLogs := fmt.Sprintf("ibmcloud is flow-logs %s", flowLogName) + cmdRetrieveFlowLogs := exec.Command("bash", "-c", retrieveFlowLogs) + flowLogsOutput, err := cmdRetrieveFlowLogs.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to retrieve keys: %w", err) + } + if !utils.VerifyDataContains(t, string(flowLogsOutput), flowLogName, logger) { + return fmt.Errorf("flow logs retrieval failed: %s", string(flowLogsOutput)) + } + + logger.Info(t, fmt.Sprintf("flow Logs '%s' retrieved successfully", flowLogName)) + return nil +} diff --git a/tests/lsf/lsf_constants.go b/tests/lsf/lsf_constants.go index 9c1f926d..2cfd4059 100644 --- a/tests/lsf/lsf_constants.go +++ b/tests/lsf/lsf_constants.go @@ -9,18 +9,20 @@ const ( HYPERTHREADTING_FALSE = false LSF_DEFAULT_RESOURCE_GROUP = "Default" LSF_CUSTOM_RESOURCE_GROUP_VALUE_AS_NULL = "null" - EXPECTED_LSF_VERSION = "10.1.0.14" - JOB_COMMAND_LOW_MEM = `bsub -J myjob[1-2] -R "select[family=mx2] rusage[mem=10G]" sleep 60` - JOB_COMMAND_MED_MEM = `bsub -J myjob[1-2] -R "select[family=mx2] rusage[mem=30G]" sleep 60` - JOB_COMMAND_HIGH_MEM = `bsub -J myjob[1-2] -R "select[family=mx2] rusage[mem=90G]" sleep 60` - JOB_COMMAND_LOW_MEM_SOUTH = `bsub -J myjob[1-2] -R "select[family=mx3d] rusage[mem=10G]" sleep 60` - JOB_COMMAND_MED_MEM_SOUTH = `bsub -J myjob[1-2] -R "select[family=mx3d] rusage[mem=30G]" sleep 60` - JOB_COMMAND_HIGH_MEM_SOUTH = `bsub -J myjob[1-2] -R "select[family=mx3d] rusage[mem=90G]" sleep 60` - JOB_COMMAND_LOW_MEM_WITH_MORE_SLEEP = `bsub -J myjob[1-2] -R "select[family=mx2] rusage[mem=30G]" sleep 60` + LOGIN_NODE_EXECUTION_PATH = "source /opt/ibm/lsf/conf/profile.lsf;" + COMPUTE_NODE_EXECUTION_PATH = "source /opt/ibm/lsf_worker/conf/profile.lsf;" + JOB_COMMAND_LOW_MEM = `bsub -J myjob[1-2] -R "select[family=mx2] rusage[mem=10G]" sleep 90` + JOB_COMMAND_MED_MEM = `bsub -J myjob[1-2] -R "select[family=mx2] rusage[mem=30G]" sleep 90` + JOB_COMMAND_HIGH_MEM = `bsub -J myjob[1-2] -R "select[family=mx2] rusage[mem=90G]" sleep 90` + JOB_COMMAND_LOW_MEM_SOUTH = `bsub -J myjob[1-2] -R "select[family=mx3d] rusage[mem=10G]" sleep 90` + JOB_COMMAND_MED_MEM_SOUTH = `bsub -J myjob[1-2] -R "select[family=mx3d] rusage[mem=30G]" sleep 90` + JOB_COMMAND_HIGH_MEM_SOUTH = `bsub -J myjob[1-2] -R "select[family=mx3d] rusage[mem=90G]" sleep 90` + JOB_COMMAND_LOW_MEM_WITH_MORE_SLEEP = `bsub -J myjob[1-2] -R "select[family=mx2] rusage[mem=30G]" sleep 90` ) var ( LSF_CUSTOM_RESOURCE_GROUP_OTHER_THAN_DEFAULT = "WES_TEST" KMS_KEY_INSTANCE_NAME = "cicd-key-instance" KMS_KEY_NAME = "cicd-key-name" + EXPECTED_LSF_VERSION = "10.1.0.14" ) diff --git a/tests/other_test.go b/tests/other_test.go index cd4e269c..81640ba4 100644 --- a/tests/other_test.go +++ b/tests/other_test.go @@ -1,6 +1,7 @@ package tests import ( + "fmt" "os" "path/filepath" "strings" @@ -348,7 +349,7 @@ func TestRunUsingExistingKMS(t *testing.T) { envVars := GetEnvVars() // Create service instance and KMS key using IBMCloud CLI - err := lsf.CreateServiceInstanceandKmsKey(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(envVars.Zone), envVars.DefaultResourceGroup, kmsInstanceName, lsf.KMS_KEY_NAME, testLogger) + err := lsf.CreateServiceInstanceAndKmsKey(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(envVars.Zone), envVars.DefaultResourceGroup, kmsInstanceName, lsf.KMS_KEY_NAME, testLogger) require.NoError(t, err, "Service instance and KMS key creation failed") testLogger.Info(t, "Service instance and KMS key created successfully "+t.Name()) @@ -372,6 +373,50 @@ func TestRunUsingExistingKMS(t *testing.T) { lsf.ValidateBasicClusterConfiguration(t, options, testLogger) } +// TestRunUsingExistingKMSInstanceIDAndWithOutKey validates cluster creation using an existing KMS. +func TestRunUsingExistingKMSInstanceIDAndWithoutKey(t *testing.T) { + // Parallelize the test to run concurrently with others + t.Parallel() + + // Setup test suite + setupTestSuite(t) + + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // Service instance name + randomString := utils.GenerateRandomString() + kmsInstanceName := "cicd-" + randomString + + // HPC cluster prefix + hpcClusterPrefix := utils.GenerateRandomString() + + // Retrieve cluster information from environment variables + envVars := GetEnvVars() + + // Create service instance and KMS key using IBMCloud CLI + err := lsf.CreateServiceInstanceAndKmsKey(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(envVars.Zone), envVars.DefaultResourceGroup, kmsInstanceName, lsf.KMS_KEY_NAME, testLogger) + require.NoError(t, err, "Service instance and KMS key creation failed") + + testLogger.Info(t, "Service instance and KMS key created successfully "+t.Name()) + + // Create test options, set up test environment + options, err := setupOptions(t, hpcClusterPrefix, terraformDir, envVars.DefaultResourceGroup, ignoreDestroys) + require.NoError(t, err, "Error setting up test options: %v", err) + + // Set Terraform variables + options.TerraformVars["key_management"] = "key_protect" + options.TerraformVars["kms_instance_name"] = kmsInstanceName + + // Skip test teardown for further inspection + options.SkipTestTearDown = true + + // Ensure the service instance and KMS key are deleted after the test + defer lsf.DeleteServiceInstanceAndAssociatedKeys(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(envVars.Zone), envVars.DefaultResourceGroup, kmsInstanceName, testLogger) + defer options.TestTearDown() + + lsf.ValidateBasicClusterConfiguration(t, options, testLogger) +} + // TestRunLDAPAndPac validates cluster creation with both Application Center (PAC) and LDAP enabled. func TestRunLDAPAndPac(t *testing.T) { // Parallelize the test to run concurrently with others @@ -450,12 +495,13 @@ func TestRunCreateVpc(t *testing.T) { vpcName := outputs["vpc_name"].(string) bastionsubnetId, computesubnetIds := utils.GetSubnetIds(outputs) - RunHpcExistingVpcSubnetId(t, vpcName, bastionsubnetId, computesubnetIds) RunHpcExistingVpcCidr(t, vpcName) + RunHpcExistingVpcSubnetIdCustomNullDnsNull(t, vpcName, bastionsubnetId, computesubnetIds) } // RunHpcExistingVpcCidr with Cidr blocks func RunHpcExistingVpcCidr(t *testing.T, vpcName string) { + fmt.Println("********* Started Executing RunHpcExistingVpcCidr ********* ") // Setup test suite setupTestSuite(t) @@ -483,10 +529,12 @@ func RunHpcExistingVpcCidr(t *testing.T, vpcName string) { defer options.TestTearDown() lsf.ValidateClusterConfiguration(t, options, testLogger) + fmt.Println("********* Ended Executing RunHpcExistingVpcCidr ********* ") } -// RunHpcExistingVpcSubnetId with compute and login subnet id's -func RunHpcExistingVpcSubnetId(t *testing.T, vpcName string, bastionsubnetId string, computesubnetIds string) { +// RunHpcExistingVpcSubnetIdCustomNullDnsNull with compute and login subnet id. Both custom_resolver and dns_instace null +func RunHpcExistingVpcSubnetIdCustomNullDnsNull(t *testing.T, vpcName string, bastionsubnetId string, computesubnetIds string) { + fmt.Println("********* Started Executing RunHpcExistingVpcSubnetIdCustomNullDnsNull ********* ") // Setup test suite setupTestSuite(t) @@ -510,6 +558,7 @@ func RunHpcExistingVpcSubnetId(t *testing.T, vpcName string, bastionsubnetId str defer options.TestTearDown() lsf.ValidateClusterConfiguration(t, options, testLogger) + fmt.Println("********* Ended Executing RunHpcExistingVpcSubnetIdCustomNullDnsNull ********* ") } // TestRunCreateVpcWithCustomDns brand new VPC with DNS @@ -548,14 +597,14 @@ func TestRunVpcWithCustomDns(t *testing.T) { instanceId, customResolverId := utils.GetDnsCustomResolverIds(outputs) bastionsubnetId, computesubnetIds := utils.GetSubnetIds(outputs) - RunHpcExistingVpcCustomDnsExist(t, vpcName, bastionsubnetId, computesubnetIds, instanceId, customResolverId) - RunHpcExistingVpcCustomExistDnsNew(t, vpcName, bastionsubnetId, computesubnetIds, customResolverId) - RunHpcNewVpcCustomNullExistDns(t, instanceId) - RunHpcNewVpcExistCustomDnsNull(t, customResolverId) + RunHpcExistingVpcBothCustomDnsExist(t, vpcName, bastionsubnetId, computesubnetIds, instanceId, customResolverId) + RunHpcExistingVpcCustomExistDnsNull(t, vpcName, bastionsubnetId, computesubnetIds, customResolverId) + RunHpcExistingVpcCustomNullDnsExist(t, instanceId) } -// RunHpcExistingVpcCustomDns with existing custom_reslover_id and dns_instance_id -func RunHpcExistingVpcCustomDnsExist(t *testing.T, vpcName string, bastionsubnetId string, computesubnetIds string, instanceId string, customResolverId string) { +// RunHpcExistingVpcCustomDns with existing custom_resolver_id and dns_instance_id +func RunHpcExistingVpcBothCustomDnsExist(t *testing.T, vpcName string, bastionsubnetId string, computesubnetIds string, instanceId string, customResolverId string) { + fmt.Println("********* Started Executing RunHpcExistingVpcBothCustomDnsExist ********* ") // Setup test suite setupTestSuite(t) @@ -582,10 +631,12 @@ func RunHpcExistingVpcCustomDnsExist(t *testing.T, vpcName string, bastionsubnet defer options.TestTearDown() lsf.ValidateClusterConfiguration(t, options, testLogger) + fmt.Println("********* Ended Executing RunHpcExistingVpcBothCustomDnsExist ********* ") } -// RunHpcExistingVpcCustomExistDnsNew with existing custom_reslover_id and new dns_instance_id -func RunHpcExistingVpcCustomExistDnsNew(t *testing.T, vpcName string, bastionsubnetId string, computesubnetIds string, customResolverId string) { +// RunHpcExistingVpcCustomExistDnsNull with existing custom_resolver_id and new dns_instance_id +func RunHpcExistingVpcCustomExistDnsNull(t *testing.T, vpcName string, bastionsubnetId string, computesubnetIds string, customResolverId string) { + fmt.Println("********* Started Executing RunHpcExistingVpcCustomExistDnsNull ********* ") // Setup test suite setupTestSuite(t) @@ -611,10 +662,12 @@ func RunHpcExistingVpcCustomExistDnsNew(t *testing.T, vpcName string, bastionsub defer options.TestTearDown() lsf.ValidateClusterConfiguration(t, options, testLogger) + fmt.Println("********* Ended Executing RunHpcExistingVpcCustomExistDnsNull ********* ") } -// RunHpcNewVpcCustomNullExistDns with custom_reslover_id null and existing dns_instance_id -func RunHpcNewVpcCustomNullExistDns(t *testing.T, instanceId string) { +// RunHpcExistingVpcCustomNullDnsExist with custom_resolver_id null and existing dns_instance_id +func RunHpcExistingVpcCustomNullDnsExist(t *testing.T, instanceId string) { + fmt.Println("********* Started Executing RunHpcExistingVpcCustomNullDnsExist ********* ") // Setup test suite setupTestSuite(t) @@ -637,32 +690,7 @@ func RunHpcNewVpcCustomNullExistDns(t *testing.T, instanceId string) { defer options.TestTearDown() lsf.ValidateClusterConfiguration(t, options, testLogger) -} - -// RunHpcNewVpcExistCustomDnsNull with existing custom_reslover_id and dns_instance_id null -func RunHpcNewVpcExistCustomDnsNull(t *testing.T, customResolverId string) { - // Setup test suite - setupTestSuite(t) - - testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) - - // HPC cluster prefix - hpcClusterPrefix := utils.GenerateRandomString() - - // Retrieve cluster information from environment variables - envVars := GetEnvVars() - - // Create test options - options, err := setupOptions(t, hpcClusterPrefix, terraformDir, envVars.DefaultResourceGroup, ignoreDestroys) - options.TerraformVars["dns_instance_id"] = customResolverId - - require.NoError(t, err, "Error setting up test options: %v", err) - - // Skip test teardown for further inspection - options.SkipTestTearDown = true - defer options.TestTearDown() - - lsf.ValidateClusterConfiguration(t, options, testLogger) + fmt.Println("********* Ended Executing RunHpcExistingVpcCustomNullDnsExist ********* ") } // TestRunWithoutMandatory tests Terraform's behavior when mandatory variables are missing by checking for specific error messages. @@ -1108,11 +1136,11 @@ func TestRunInvalidDomainName(t *testing.T) { }, }) - // Apply the Terraform configuration + // Plan the Terraform configuration _, err = terraform.InitAndPlanE(t, terraformOptions) - // Check if an error occurred during apply - assert.Error(t, err, "Expected an error during apply") + // Check if an error occurred during plan + assert.Error(t, err, "Expected an error during plan") if err != nil { // Check if the error message contains specific keywords indicating domain name issues @@ -1151,7 +1179,7 @@ func TestRunKMSInstanceNameAndKMSKeyNameWithInvalidValue(t *testing.T) { envVars := GetEnvVars() // Create service instance and KMS key using IBMCloud CLI - err := lsf.CreateServiceInstanceandKmsKey(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(envVars.Zone), envVars.DefaultResourceGroup, kmsInstanceName, lsf.KMS_KEY_NAME, testLogger) + err := lsf.CreateServiceInstanceAndKmsKey(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(envVars.Zone), envVars.DefaultResourceGroup, kmsInstanceName, lsf.KMS_KEY_NAME, testLogger) require.NoError(t, err, "Failed to create service instance and KMS key") // Ensure the service instance and KMS key are deleted after the test @@ -1316,7 +1344,7 @@ func TestRunExistSubnetIDVpcNameAsNull(t *testing.T) { }, }) - // Apply the Terraform configuration + // Plan the Terraform configuration _, err = terraform.InitAndPlanE(t, terraformOptions) // Check if an error occurred during plan @@ -1379,11 +1407,11 @@ func TestRunInvalidSshKeysAndRemoteAllowedIP(t *testing.T) { }, }) - // Apply the Terraform configuration + // Plan the Terraform configuration _, err = terraform.InitAndPlanE(t, terraformOptions) - // Check if an error occurred during apply - assert.Error(t, err, "Expected an error during apply") + // Check if an error occurred during plan + assert.Error(t, err, "Expected an error during plan") if err != nil { // Check if the error message contains specific keywords indicating domain name issues @@ -1401,3 +1429,97 @@ func TestRunInvalidSshKeysAndRemoteAllowedIP(t *testing.T) { testLogger.FAIL(t, "Expected error did not occur on Invalid ssh keys and remote allowed IP") } } + +// TestRunCosAndVpcFlowLogs validates cluster creation with vpc flow logs and cos enabled. +func TestRunCosAndVpcFlowLogs(t *testing.T) { + // Parallelize the test to run concurrently with others + t.Parallel() + + // Setup test suite + setupTestSuite(t) + + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // HPC cluster prefix + hpcClusterPrefix := utils.GenerateRandomString() + + // Retrieve cluster information from environment variables + envVars := GetEnvVars() + + // Create test options, set up test environment + options, err := setupOptions(t, hpcClusterPrefix, terraformDir, envVars.DefaultResourceGroup, ignoreDestroys) + require.NoError(t, err, "Error setting up test options: %v", err) + + // Set Terraform variables + options.TerraformVars["enable_cos_integration"] = true + options.TerraformVars["enable_vpc_flow_logs"] = true + + // Skip test teardown for further inspection + options.SkipTestTearDown = true + defer options.TestTearDown() + + lsf.ValidateBasicClusterConfigurationWithVPCFlowLogsAndCos(t, options, testLogger) +} + +// TestRunInvalidSubnetCIDR validates cluster creation with invalid subnet CIDR ranges. +func TestRunInvalidSubnetCIDR(t *testing.T) { + // Parallelize the test to run concurrently with others + t.Parallel() + + // Setup test suite + setupTestSuite(t) + + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // HPC cluster prefix + hpcClusterPrefix := utils.GenerateTimestampedClusterPrefix(utils.GenerateRandomString()) + + // Retrieve cluster information from environment variables + envVars := GetEnvVars() + + // Get the absolute path of solutions/hpc + abs, err := filepath.Abs("solutions/hpc") + require.NoError(t, err, "Unable to get absolute path") + + terrPath := strings.ReplaceAll(abs, "tests/", "") + + // Define Terraform options + terraformOptions := terraform.WithDefaultRetryableErrors(t, &terraform.Options{ + TerraformDir: terrPath, + Vars: map[string]interface{}{ + "cluster_prefix": hpcClusterPrefix, + "bastion_ssh_keys": utils.SplitAndTrim(envVars.SSHKey, ","), + "compute_ssh_keys": utils.SplitAndTrim(envVars.SSHKey, ","), + "zones": utils.SplitAndTrim(envVars.Zone, ","), + "remote_allowed_ips": utils.SplitAndTrim(envVars.RemoteAllowedIPs, ","), + "cluster_id": envVars.ClusterID, + "reservation_id": envVars.ReservationID, + "vpc_cluster_private_subnets_cidr_blocks": utils.SplitAndTrim("1.1.1.1/20", ","), + "vpc_cluster_login_private_subnets_cidr_blocks": utils.SplitAndTrim("2.2.2.2/20", ","), + }, + }) + + // Apply the Terraform configuration + _, err = terraform.InitAndApplyE(t, terraformOptions) + + // Check if an error occurred during apply + assert.Error(t, err, "Expected an error during apply") + + if err != nil { + // Check if the error message contains specific keywords indicating Subnet CIDR block issues + result := utils.VerifyDataContains(t, err.Error(), "Invalid json payload provided: Key: 'SubnetTemplateOneOf.SubnetTemplate.CIDRBlock' Error:Field validation for 'CIDRBlock' failed on the 'validcidr' tag", testLogger) + assert.True(t, result) + if result { + testLogger.PASS(t, "Validation succeeded: Invalid Subnet CIDR range") + } else { + testLogger.FAIL(t, "Validation failed: Invalid Subnet CIDR range") + } + } else { + // Log an error if the expected error did not occur + t.Error("Expected error did not occur") + testLogger.FAIL(t, "Expected error did not occur on Invalid Subnet CIDR range") + } + + // Cleanup resources + defer terraform.Destroy(t, terraformOptions) +} diff --git a/tests/pr_test.go b/tests/pr_test.go index e8af6faa..94282308 100644 --- a/tests/pr_test.go +++ b/tests/pr_test.go @@ -36,6 +36,7 @@ var ignoreDestroys = []string{ "module.landing_zone_vsi.module.hpc.module.landing_zone_vsi.module.wait_management_candidate_vsi_booted.null_resource.remote_exec[0]", "module.landing_zone_vsi.module.hpc.module.landing_zone_vsi.module.wait_management_vsi_booted.null_resource.remote_exec[0]", "module.landing_zone_vsi.module.do_management_vsi_configuration.null_resource.remote_exec_script_cp_files[1]", + "module.landing_zone_vsi.module.do_management_vsi_configuration.null_resource.remote_exec_script_new_file[0]", } // EnvVars stores environment variable values. diff --git a/tests/test_config.yml b/tests/test_config.yml index a28b5e2c..cf66852c 100644 --- a/tests/test_config.yml +++ b/tests/test_config.yml @@ -7,7 +7,7 @@ remote_allowed_ips: ssh_key: geretain-hpc login_node_instance_type: bx2-2x8 login_image_name: hpcaas-lsf10-rhel88-compute-v5 -management_image_name: hpcaas-lsf10-rhel88-v8 +management_image_name: hpcaas-lsf10-rhel88-v9 compute_image_name: hpcaas-lsf10-rhel88-compute-v5 management_node_instance_type: bx2-2x8 management_node_count: 2