diff --git a/.tekton/README.md b/.tekton/README.md index 28f13be3..905dc2b0 100644 --- a/.tekton/README.md +++ b/.tekton/README.md @@ -36,7 +36,7 @@ https://cloud.ibm.com/devops/getting-started?env_id=ibm:yp:eu-de 3. cluster_prefix 4. zone 5. resource_group -6. cluster_id +6. cluster_name 7. reservation_id For additional assistance, contact the project maintainers. diff --git a/.tekton/hpcaas/hpcaas-pr-pipeline/hpcaas-pipeline-git-pr-status.yaml b/.tekton/hpcaas/hpcaas-pr-pipeline/hpcaas-pipeline-git-pr-status.yaml index 8fbfaed1..0bb19d65 100644 --- a/.tekton/hpcaas/hpcaas-pr-pipeline/hpcaas-pipeline-git-pr-status.yaml +++ b/.tekton/hpcaas/hpcaas-pr-pipeline/hpcaas-pipeline-git-pr-status.yaml @@ -60,7 +60,7 @@ spec: - name: management_image_name description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. default: "" - - name: cluster_id + - name: cluster_name description: Ensure that you have received the cluster ID from IBM technical sales. A unique identifer for HPC cluster used by IBM Cloud HPC to differentiate different HPC clusters within the same reservation. This can be up to 39 alphanumeric characters including the underscore (_), the hyphen (-), and the period (.) characters. You cannot change the cluster ID after deployment. default: "" - name: reservation_id @@ -223,8 +223,8 @@ spec: value: $(params.login_image_name) - name: management_image_name value: $(params.management_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: pr-revision @@ -279,8 +279,8 @@ spec: value: $(params.login_image_name) - name: management_image_name value: $(params.management_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: pr-revision diff --git a/.tekton/hpcaas/hpcaas-pr-pipeline/listener-git-pr-status.yaml b/.tekton/hpcaas/hpcaas-pr-pipeline/listener-git-pr-status.yaml index b83bcb49..382702fb 100644 --- a/.tekton/hpcaas/hpcaas-pr-pipeline/listener-git-pr-status.yaml +++ b/.tekton/hpcaas/hpcaas-pr-pipeline/listener-git-pr-status.yaml @@ -55,7 +55,7 @@ spec: - name: management_image_name description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. default: "" - - name: cluster_id + - name: cluster_name description: Ensure that you have received the cluster ID from IBM technical sales. A unique identifer for HPC cluster used by IBM Cloud HPC to differentiate different HPC clusters within the same reservation. This can be up to 39 alphanumeric characters including the underscore (_), the hyphen (-), and the period (.) characters. You cannot change the cluster ID after deployment. default: "" - name: reservation_id @@ -169,8 +169,8 @@ spec: value: $(params.login_image_name) - name: management_image_name value: $(params.management_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: us_east_zone diff --git a/.tekton/hpcaas/hpcaas-regression-pipeline/hpcaas-pipeline-git-trigger.yaml b/.tekton/hpcaas/hpcaas-regression-pipeline/hpcaas-pipeline-git-trigger.yaml index 56a69cd2..5118a2da 100644 --- a/.tekton/hpcaas/hpcaas-regression-pipeline/hpcaas-pipeline-git-trigger.yaml +++ b/.tekton/hpcaas/hpcaas-regression-pipeline/hpcaas-pipeline-git-trigger.yaml @@ -62,7 +62,7 @@ spec: - name: management_image_name description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. default: "" - - name: cluster_id + - name: cluster_name description: Ensure that you have received the cluster ID from IBM technical sales. A unique identifer for HPC cluster used by IBM Cloud HPC to differentiate different HPC clusters within the same reservation. This can be up to 39 alphanumeric characters including the underscore (_), the hyphen (-), and the period (.) characters. You cannot change the cluster ID after deployment. default: "" - name: reservation_id @@ -214,8 +214,8 @@ spec: value: $(params.login_image_name) - name: management_image_name value: $(params.management_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: us_east_zone @@ -288,8 +288,8 @@ spec: value: $(params.login_image_name) - name: management_image_name value: $(params.management_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: us_east_zone @@ -362,8 +362,8 @@ spec: value: $(params.login_image_name) - name: management_image_name value: $(params.management_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: us_east_zone @@ -436,8 +436,8 @@ spec: value: $(params.login_image_name) - name: management_image_name value: $(params.management_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: us_east_zone diff --git a/.tekton/hpcaas/hpcaas-regression-pipeline/listener-git-trigger.yaml b/.tekton/hpcaas/hpcaas-regression-pipeline/listener-git-trigger.yaml index 13053d2b..5b4ab0ad 100644 --- a/.tekton/hpcaas/hpcaas-regression-pipeline/listener-git-trigger.yaml +++ b/.tekton/hpcaas/hpcaas-regression-pipeline/listener-git-trigger.yaml @@ -62,7 +62,7 @@ spec: - name: management_image_name description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. default: "" - - name: cluster_id + - name: cluster_name description: Ensure that you have received the cluster ID from IBM technical sales. A unique identifer for HPC cluster used by IBM Cloud HPC to differentiate different HPC clusters within the same reservation. This can be up to 39 alphanumeric characters including the underscore (_), the hyphen (-), and the period (.) characters. You cannot change the cluster ID after deployment. default: "" - name: reservation_id @@ -182,8 +182,8 @@ spec: value: $(params.login_image_name) - name: management_image_name value: $(params.management_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: us_east_zone @@ -259,8 +259,8 @@ spec: value: $(params.management_image_name) - name: reservation_id value: $(params.reservation_id) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: us_east_zone value: $(params.us_east_zone) - name: us_east_cluster_id @@ -350,7 +350,7 @@ spec: value: $(event.ref) - name: resource_group value: $(event.ref) - - name: cluster_id + - name: cluster_name value: $(event.ref) - name: compute_image_name_rhel value: $(event.ref) diff --git a/.tekton/hpcaas/hpcaas_task/hpcaas-task-infra-rhel.yaml b/.tekton/hpcaas/hpcaas_task/hpcaas-task-infra-rhel.yaml index 1d1a7d56..68af49c8 100644 --- a/.tekton/hpcaas/hpcaas_task/hpcaas-task-infra-rhel.yaml +++ b/.tekton/hpcaas/hpcaas_task/hpcaas-task-infra-rhel.yaml @@ -53,7 +53,7 @@ spec: - name: login_image_name description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/hpc-spectrum-LSF#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v2). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. default: "" - - name: cluster_id + - name: cluster_name description: Ensure that you have received the cluster ID from IBM technical sales. A unique identifer for HPC cluster used by IBM Cloud HPC to differentiate different HPC clusters within the same reservation. This can be up to 39 alphanumeric characters including the underscore (_), the hyphen (-), and the period (.) characters. You cannot change the cluster ID after deployment. default: "" - name: reservation_id @@ -127,6 +127,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: REVISION @@ -141,8 +145,8 @@ spec: value: $(params.compute_image_name_rhel) - name: login_image_name value: $(params.login_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: us_east_zone diff --git a/.tekton/hpcaas/hpcaas_task/hpcaas-task-negative.yaml b/.tekton/hpcaas/hpcaas_task/hpcaas-task-negative.yaml index c7ec96e3..2bf4508d 100644 --- a/.tekton/hpcaas/hpcaas_task/hpcaas-task-negative.yaml +++ b/.tekton/hpcaas/hpcaas_task/hpcaas-task-negative.yaml @@ -53,7 +53,7 @@ spec: - name: login_image_name description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/hpc-spectrum-LSF#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v2). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. default: "" - - name: cluster_id + - name: cluster_name description: Ensure that you have received the cluster ID from IBM technical sales. A unique identifer for HPC cluster used by IBM Cloud HPC to differentiate different HPC clusters within the same reservation. This can be up to 39 alphanumeric characters including the underscore (_), the hyphen (-), and the period (.) characters. You cannot change the cluster ID after deployment. default: "" - name: reservation_id @@ -127,6 +127,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: REVISION @@ -141,8 +145,8 @@ spec: value: $(params.compute_image_name_rhel) - name: login_image_name value: $(params.login_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: us_east_zone diff --git a/.tekton/hpcaas/hpcaas_task/hpcaas-task-pr-rhel.yaml b/.tekton/hpcaas/hpcaas_task/hpcaas-task-pr-rhel.yaml index 302e0623..e1f1eaed 100644 --- a/.tekton/hpcaas/hpcaas_task/hpcaas-task-pr-rhel.yaml +++ b/.tekton/hpcaas/hpcaas_task/hpcaas-task-pr-rhel.yaml @@ -51,7 +51,7 @@ spec: - name: login_image_name description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/hpc-spectrum-LSF#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v2). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. default: "" - - name: cluster_id + - name: cluster_name description: Ensure that you have received the cluster ID from IBM technical sales. A unique identifer for HPC cluster used by IBM Cloud HPC to differentiate different HPC clusters within the same reservation. This can be up to 39 alphanumeric characters including the underscore (_), the hyphen (-), and the period (.) characters. You cannot change the cluster ID after deployment. default: "" - name: reservation_id @@ -101,6 +101,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: PR_REVISION @@ -120,8 +124,8 @@ spec: value: $(params.compute_image_name_rhel) - name: login_image_name value: $(params.login_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: cos_region diff --git a/.tekton/hpcaas/hpcaas_task/hpcaas-task-pr-ubuntu.yaml b/.tekton/hpcaas/hpcaas_task/hpcaas-task-pr-ubuntu.yaml index 3b7c5d52..07d70594 100644 --- a/.tekton/hpcaas/hpcaas_task/hpcaas-task-pr-ubuntu.yaml +++ b/.tekton/hpcaas/hpcaas_task/hpcaas-task-pr-ubuntu.yaml @@ -51,7 +51,7 @@ spec: - name: login_image_name description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/hpc-spectrum-LSF#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v2). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. default: "" - - name: cluster_id + - name: cluster_name description: Ensure that you have received the cluster ID from IBM technical sales. A unique identifer for HPC cluster used by IBM Cloud HPC to differentiate different HPC clusters within the same reservation. This can be up to 39 alphanumeric characters including the underscore (_), the hyphen (-), and the period (.) characters. You cannot change the cluster ID after deployment. default: "" - name: reservation_id @@ -101,6 +101,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: PR_REVISION @@ -120,8 +124,8 @@ spec: value: $(params.compute_image_name_ubuntu) - name: login_image_name value: $(params.login_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: cos_region diff --git a/.tekton/hpcaas/hpcaas_task/hpcaas-task-region.yaml b/.tekton/hpcaas/hpcaas_task/hpcaas-task-region.yaml index 55025fee..2512fee9 100644 --- a/.tekton/hpcaas/hpcaas_task/hpcaas-task-region.yaml +++ b/.tekton/hpcaas/hpcaas_task/hpcaas-task-region.yaml @@ -53,7 +53,7 @@ spec: - name: login_image_name description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/hpc-spectrum-LSF#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v2). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. default: "" - - name: cluster_id + - name: cluster_name description: Ensure that you have received the cluster ID from IBM technical sales. A unique identifer for HPC cluster used by IBM Cloud HPC to differentiate different HPC clusters within the same reservation. This can be up to 39 alphanumeric characters including the underscore (_), the hyphen (-), and the period (.) characters. You cannot change the cluster ID after deployment. default: "" - name: reservation_id @@ -127,6 +127,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: REVISION @@ -141,8 +145,8 @@ spec: value: $(params.compute_image_name_rhel) - name: login_image_name value: $(params.login_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: us_east_zone diff --git a/.tekton/hpcaas/hpcaas_task/hpcaas-test-infra-ubuntu.yaml b/.tekton/hpcaas/hpcaas_task/hpcaas-test-infra-ubuntu.yaml index 0df74d7e..f5b3c333 100644 --- a/.tekton/hpcaas/hpcaas_task/hpcaas-test-infra-ubuntu.yaml +++ b/.tekton/hpcaas/hpcaas_task/hpcaas-test-infra-ubuntu.yaml @@ -53,7 +53,7 @@ spec: - name: login_image_name description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/hpc-spectrum-LSF#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v2). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. default: "" - - name: cluster_id + - name: cluster_name description: Ensure that you have received the cluster ID from IBM technical sales. A unique identifer for HPC cluster used by IBM Cloud HPC to differentiate different HPC clusters within the same reservation. This can be up to 39 alphanumeric characters including the underscore (_), the hyphen (-), and the period (.) characters. You cannot change the cluster ID after deployment. default: "" - name: reservation_id @@ -127,6 +127,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: REVISION @@ -141,8 +145,8 @@ spec: value: $(params.compute_image_name_ubuntu) - name: login_image_name value: $(params.login_image_name) - - name: cluster_id - value: $(params.cluster_id) + - name: cluster_name + value: $(params.cluster_name) - name: reservation_id value: $(params.reservation_id) - name: us_east_zone diff --git a/.tekton/lsf/lsf-regression-pipeline/listener-git-trigger.yaml b/.tekton/lsf/lsf-regression-pipeline/listener-git-trigger.yaml index e6c56eb8..96f226af 100644 --- a/.tekton/lsf/lsf-regression-pipeline/listener-git-trigger.yaml +++ b/.tekton/lsf/lsf-regression-pipeline/listener-git-trigger.yaml @@ -83,6 +83,9 @@ spec: - name: ibm_customer_number description: Comma-separated list of the IBM Customer Number(s) (ICN) that is used for the Bring Your Own License (BYOL) entitlement check. For more information on how to find your ICN, see [What is my IBM Customer Number (ICN)?](https://www.ibm.com/support/pages/what-my-ibm-customer-number-icn).. default: "" + - name: pac_ha_exist_certificate + description: PAC HA Existing Certificate + default: "" resourcetemplates: - apiVersion: v1 kind: PersistentVolumeClaim @@ -149,6 +152,8 @@ spec: value: $(params.git_user_name) - name: git_user_email value: $(params.git_user_email) + - name: pac_ha_exist_certificate + value: $(params.pac_ha_exist_certificate) workspaces: - name: pipeline-ws persistentVolumeClaim: @@ -196,6 +201,8 @@ spec: value: $(params.solution) - name: ibm_customer_number value: $(params.ibm_customer_number) + - name: pac_ha_exist_certificate + value: $(params.pac_ha_exist_certificate) --- apiVersion: tekton.dev/v1beta1 kind: EventListener @@ -261,6 +268,8 @@ spec: value: $(event.ref) - name: ibm_customer_number value: $(event.ref) + - name: pac_ha_exist_certificate + value: $(event.ref) --- apiVersion: tekton.dev/v1beta1 kind: EventListener diff --git a/.tekton/lsf/lsf-regression-pipeline/lsf-pipeline-git-trigger.yaml b/.tekton/lsf/lsf-regression-pipeline/lsf-pipeline-git-trigger.yaml index 6070c1b5..607faa3e 100644 --- a/.tekton/lsf/lsf-regression-pipeline/lsf-pipeline-git-trigger.yaml +++ b/.tekton/lsf/lsf-regression-pipeline/lsf-pipeline-git-trigger.yaml @@ -74,6 +74,9 @@ spec: - name: ibm_customer_number description: Comma-separated list of the IBM Customer Number(s) (ICN) that is used for the Bring Your Own License (BYOL) entitlement check. For more information on how to find your ICN, see [What is my IBM Customer Number (ICN)?](https://www.ibm.com/support/pages/what-my-ibm-customer-number-icn).. default: "" + - name: pac_ha_exist_certificate + description: PAC HA Existing Certificate + default: "" workspaces: - name: pipeline-ws tasks: @@ -272,6 +275,56 @@ spec: value: $(params.solution) - name: ibm_customer_number value: $(params.ibm_customer_number) + - name: wes-lsf-da-rhel-4 + runAfter: [git-clone, pre-requisites-install, ssh-key-creation] + taskRef: + name: wes-lsf-da-rhel-4 + workspaces: + - name: workspace + workspace: pipeline-ws + params: + - name: repository + value: $(params.repository) + - name: git_access_token + value: $(params.git_access_token) + - name: pipeline-debug + value: $(params.pipeline-debug) + - name: zone + value: $(params.zone) + - name: resource_group + value: $(params.resource_group) + - name: compute_image_name_rhel + value: $(params.compute_image_name_rhel) + - name: compute_image_name_ubuntu + value: $(params.compute_image_name_ubuntu) + - name: login_image_name + value: $(params.login_image_name) + - name: management_image_name + value: $(params.management_image_name) + - name: revision + value: $(params.revision) + - name: cos_region + value: $(params.cos_region) + - name: cos_bucket + value: $(params.cos_bucket) + - name: cos_instance_crn + value: $(params.cos_instance_crn) + - name: cos_api_key + value: $(params.cos_api_key) + - name: hpc_custom_reports_repo + value: $(params.hpc_custom_reports_repo) + - name: hpc_custom_reports_branch + value: $(params.hpc_custom_reports_branch) + - name: git_user_name + value: $(params.git_user_name) + - name: git_user_email + value: $(params.git_user_email) + - name: solution + value: $(params.solution) + - name: ibm_customer_number + value: $(params.ibm_customer_number) + - name: pac_ha_exist_certificate + value: $(params.pac_ha_exist_certificate) # - name: wes-lsf-da-ubuntu # runAfter: [git-clone, pre-requisites-install, ssh-key-creation] # taskRef: @@ -437,6 +490,7 @@ spec: wes-lsf-da-rhel-1, wes-lsf-da-rhel-2, wes-lsf-da-rhel-3, + wes-lsf-da-rhel-4, wes-lsf-da-region, wes-lsf-da-negative, ] @@ -466,6 +520,7 @@ spec: wes-lsf-da-rhel-1, wes-lsf-da-rhel-2, wes-lsf-da-rhel-3, + wes-lsf-da-rhel-4, wes-lsf-da-region, wes-lsf-da-negative, ] @@ -566,6 +621,40 @@ spec: VALIDATION_LOG_FILE_NAME="lsf-rhel-suite-8.log" source .tekton/scripts/issue_track.sh display_validation_log "${VALIDATION_LOG_FILE_NAME}" + + - name: validation-lsf-rhel-suite-9 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + VALIDATION_LOG_FILE_NAME="lsf-rhel-suite-9.log" + source .tekton/scripts/issue_track.sh + display_validation_log "${VALIDATION_LOG_FILE_NAME}" + - name: validation-lsf-rhel-suite-10 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + VALIDATION_LOG_FILE_NAME="lsf-rhel-suite-10.log" + source .tekton/scripts/issue_track.sh + display_validation_log "${VALIDATION_LOG_FILE_NAME}" + - name: validation-lsf-rhel-suite-11 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + VALIDATION_LOG_FILE_NAME="lsf-rhel-suite-11.log" + source .tekton/scripts/issue_track.sh + display_validation_log "${VALIDATION_LOG_FILE_NAME}" # - name: validation-lsf-ubuntu-suite # onError: continue # image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest @@ -621,12 +710,35 @@ spec: VALIDATION_LOG_FILE_NAME="lsf-negative-suite-3.log" source .tekton/scripts/issue_track.sh display_validation_log "${VALIDATION_LOG_FILE_NAME}" + - name: validation-lsf-negative-suite-4 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + VALIDATION_LOG_FILE_NAME="lsf-negative-suite-4.log" + source .tekton/scripts/issue_track.sh + display_validation_log "${VALIDATION_LOG_FILE_NAME}" + - name: validation-lsf-negative-suite-5 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + VALIDATION_LOG_FILE_NAME="lsf-negative-suite-5.log" + source .tekton/scripts/issue_track.sh + display_validation_log "${VALIDATION_LOG_FILE_NAME}" - name: display-lsf-infra-logs runAfter: [ wes-lsf-da-rhel-1, wes-lsf-da-rhel-2, wes-lsf-da-rhel-3, + wes-lsf-da-rhel-4, wes-lsf-da-region, wes-lsf-da-negative, ] @@ -727,6 +839,39 @@ spec: LOG_FILE_NAME="lsf-rhel-suite-8.json" source .tekton/scripts/issue_track.sh issue_track "${LOG_FILE_NAME}" + - name: display-infra-log-rhel-suite-9 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + LOG_FILE_NAME="lsf-rhel-suite-9.json" + source .tekton/scripts/issue_track.sh + issue_track "${LOG_FILE_NAME}" + - name: display-infra-log-rhel-suite-10 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + LOG_FILE_NAME="lsf-rhel-suite-10.json" + source .tekton/scripts/issue_track.sh + issue_track "${LOG_FILE_NAME}" + - name: display-infra-log-rhel-suite-11 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + LOG_FILE_NAME="lsf-rhel-suite-11.json" + source .tekton/scripts/issue_track.sh + issue_track "${LOG_FILE_NAME}" # - name: display-infra-log-ubuntu-suite # image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest # workingDir: "/artifacts" @@ -781,6 +926,28 @@ spec: LOG_FILE_NAME="lsf-negative-suite-3.json" source .tekton/scripts/issue_track.sh issue_track "${LOG_FILE_NAME}" "negative_suite" + - name: display-infra-log-negative-suite-4 + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + onError: continue + workingDir: "/artifacts" + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + LOG_FILE_NAME="lsf-negative-suite-4.json" + source .tekton/scripts/issue_track.sh + issue_track "${LOG_FILE_NAME}" "negative_suite" + - name: display-infra-log-negative-suite-5 + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + onError: continue + workingDir: "/artifacts" + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + LOG_FILE_NAME="lsf-negative-suite-5.json" + source .tekton/scripts/issue_track.sh + issue_track "${LOG_FILE_NAME}" "negative_suite" - name: error-check-on-lsf-infra-logs runAfter: [display-validation-logs, display-lsf-infra-logs] workspaces: diff --git a/.tekton/lsf/lsf_task/lsf-task-infra-rhel-1.yaml b/.tekton/lsf/lsf_task/lsf-task-infra-rhel-1.yaml index fed419a9..3654a5cf 100644 --- a/.tekton/lsf/lsf_task/lsf-task-infra-rhel-1.yaml +++ b/.tekton/lsf/lsf_task/lsf-task-infra-rhel-1.yaml @@ -85,6 +85,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: REVISION diff --git a/.tekton/lsf/lsf_task/lsf-task-infra-rhel-2.yaml b/.tekton/lsf/lsf_task/lsf-task-infra-rhel-2.yaml index 5075e513..048dd1dd 100644 --- a/.tekton/lsf/lsf_task/lsf-task-infra-rhel-2.yaml +++ b/.tekton/lsf/lsf_task/lsf-task-infra-rhel-2.yaml @@ -85,6 +85,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: REVISION diff --git a/.tekton/lsf/lsf_task/lsf-task-infra-rhel-3.yaml b/.tekton/lsf/lsf_task/lsf-task-infra-rhel-3.yaml index c9e9cab9..5ec6e567 100644 --- a/.tekton/lsf/lsf_task/lsf-task-infra-rhel-3.yaml +++ b/.tekton/lsf/lsf_task/lsf-task-infra-rhel-3.yaml @@ -85,6 +85,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: REVISION diff --git a/.tekton/lsf/lsf_task/lsf-task-infra-rhel-4.yaml b/.tekton/lsf/lsf_task/lsf-task-infra-rhel-4.yaml new file mode 100644 index 00000000..dc8c535e --- /dev/null +++ b/.tekton/lsf/lsf_task/lsf-task-infra-rhel-4.yaml @@ -0,0 +1,190 @@ +--- +apiVersion: tekton.dev/v1beta1 +kind: Task +metadata: + name: wes-lsf-da-rhel-4 +spec: + params: + - name: ibmcloud-api + description: the ibmcloud api + default: https://cloud.ibm.com + - name: continuous-delivery-context-secret + description: name of the secret containing the continuous delivery pipeline context secrets + default: secure-properties + - name: ibmcloud-apikey-secret-key + description: field in the secret that contains the api key used to login to ibmcloud + default: ibmcloud_api_key + - name: pipeline-debug + description: Pipeline debug mode. Value can be 0 or 1. Default to 0 + default: "0" + - name: revision + description: | + the git revision/commit to update the git HEAD to. + Default is to mean only use the branch + default: "" + - name: directory-name + default: "." + - name: repository + description: the git repo url + - name: git_access_token + description: the token to access the git repository for the clone operations + default: "" + - name: zone + default: "" + description: The IBM Cloud zone name within the selected region where the IBM Cloud HPC cluster should be deployed and requires a single zone input value. Supported zones are eu-de-2 and eu-de-3 for eu-de, us-east-1 and us-east-3 for us-east, and us-south-1 for us-south. The management nodes, file storage shares, and compute nodes will be deployed in the same zone.[Learn more](https://cloud.ibm.com/docs/vpc?topic=vpc-creating-a-vpc-in-a-different-region#get-zones-using-the-cli). + - name: resource_group + description: Resource group name from your IBM Cloud account where the VPC resources should be deployed. Note. If the resource group value is set as null, automation creates two different RG with the name (workload-rg and service-rg). For additional information on resource groups, see [Managing resource groups](https://cloud.ibm.com/docs/account?topic=account-rgs). + default: Default + - name: compute_image_name_rhel + description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster dynamic compute nodes. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/hpc-spectrum-LSF#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v1). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. + default: "" + - name: login_image_name + description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster login node. By default, the solution uses a RHEL 8-6 OS image with additional software packages mentioned [here](https://cloud.ibm.com/docs/hpc-spectrum-LSF#create-custom-image). The solution also offers, Ubuntu 22-04 OS base image (hpcaas-lsf10-ubuntu2204-compute-v2). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. + default: "" + - name: cos_region + description: The cos region name. + default: "" + - name: cos_bucket + description: The cos bucket name. + default: "" + - name: cos_instance_crn + description: The cos instance crn. + default: "" + - name: cos_api_key + description: The cos account api key. + default: "" + - name: hpc_custom_reports_repo + description: The HPC custom reports storage repository. + default: "" + - name: hpc_custom_reports_branch + description: The HPC custom reports storage repository branch. + default: "main" + - name: git_user_name + description: The git user name. + default: "" + - name: git_user_email + description: The git user email. + default: "" + - name: management_image_name + description: Name of the custom image that you want to use to create virtual server instances in your IBM Cloud account to deploy the IBM Cloud HPC cluster management nodes. By default, the solution uses a RHEL88 base image with additional software packages mentioned [here](https://cloud.ibm.com/docs/ibm-spectrum-lsf#create-custom-image). If you would like to include your application-specific binary files, follow the instructions in [ Planning for custom images ](https://cloud.ibm.com/docs/vpc?topic=vpc-planning-custom-images) to create your own custom image and use that to build the IBM Cloud HPC cluster through this offering. + default: "" + - name: solution + description: Provide the value for the solution that is needed for the support of lsf and HPC. + default: "lsf" + - name: ibm_customer_number + description: Comma-separated list of the IBM Customer Number(s) (ICN) that is used for the Bring Your Own License (BYOL) entitlement check. For more information on how to find your ICN, see [What is my IBM Customer Number (ICN)?](https://www.ibm.com/support/pages/what-my-ibm-customer-number-icn).. + default: "" + - name: pac_ha_exist_certificate + description: PAC HA Existing Certificate + default: "" + workspaces: + - name: workspace + mountPath: /artifacts + stepTemplate: + env: + - name: API_KEY + valueFrom: + secretKeyRef: + name: $(params.continuous-delivery-context-secret) + key: $(params.ibmcloud-apikey-secret-key) + optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] + - name: PIPELINE_DEBUG + value: $(params.pipeline-debug) + - name: REVISION + value: $(params.revision) + - name: zone + value: $(params.zone) + - name: resource_group + value: $(params.resource_group) + - name: compute_image_name_rhel + value: $(params.compute_image_name_rhel) + - name: login_image_name + value: $(params.login_image_name) + - name: cos_region + value: $(params.cos_region) + - name: cos_bucket + value: $(params.cos_bucket) + - name: cos_instance_crn + value: $(params.cos_instance_crn) + - name: cos_api_key + value: $(params.cos_api_key) + - name: hpc_custom_reports_repo + value: $(params.hpc_custom_reports_repo) + - name: hpc_custom_reports_branch + value: $(params.hpc_custom_reports_branch) + - name: git_user_name + value: $(params.git_user_name) + - name: git_user_email + value: $(params.git_user_email) + - name: git_access_token + value: $(params.git_access_token) + - name: solution + value: $(params.solution) + - name: ibm_customer_number + value: $(params.ibm_customer_number) + - name: management_image_name + value: $(params.management_image_name) + - name: pac_ha_exist_certificate + value: $(params.pac_ha_exist_certificate) + steps: + - name: rhel-suite-7 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + imagePullPolicy: Always + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + + if [[ "${PIPELINE_DEBUG}" == "true" ]]; then + pwd + env + trap env EXIT + set -x + fi + + source .tekton/scripts/suites.sh + lsf_rhel_suite_7 + - name: rhel-suite-8 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + imagePullPolicy: Always + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + + if [[ "${PIPELINE_DEBUG}" == "true" ]]; then + pwd + env + trap env EXIT + set -x + fi + + source .tekton/scripts/suites.sh + lsf_rhel_suite_8 + - name: rhel-suite-9 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + imagePullPolicy: Always + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + + if [[ "${PIPELINE_DEBUG}" == "true" ]]; then + pwd + env + trap env EXIT + set -x + fi + + source .tekton/scripts/suites.sh + lsf_rhel_suite_9 diff --git a/.tekton/lsf/lsf_task/lsf-task-negative.yaml b/.tekton/lsf/lsf_task/lsf-task-negative.yaml index 479b0387..329129b9 100644 --- a/.tekton/lsf/lsf_task/lsf-task-negative.yaml +++ b/.tekton/lsf/lsf_task/lsf-task-negative.yaml @@ -85,6 +85,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: REVISION @@ -182,7 +186,47 @@ spec: source .tekton/scripts/suites.sh echo "${hpc_custom_reports_repo}" lsf_negative_suite_3 - - name: rhel-suite-7 + - name: negative-suite-4 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + imagePullPolicy: Always + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + + if [[ "${PIPELINE_DEBUG}" == "true" ]]; then + pwd + env + trap env EXIT + set -x + fi + + source .tekton/scripts/suites.sh + echo "${hpc_custom_reports_repo}" + lsf_negative_suite_4 + - name: negative-suite-5 + onError: continue + image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest + workingDir: "/artifacts" + imagePullPolicy: Always + command: ["/bin/bash", "-c"] + args: + - | + #!/bin/bash + + if [[ "${PIPELINE_DEBUG}" == "true" ]]; then + pwd + env + trap env EXIT + set -x + fi + + source .tekton/scripts/suites.sh + echo "${hpc_custom_reports_repo}" + lsf_negative_suite_5 + - name: rhel-suite-11 onError: continue image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest workingDir: "/artifacts" @@ -200,4 +244,4 @@ spec: fi source .tekton/scripts/suites.sh - lsf_rhel_suite_7 + lsf_rhel_suite_11 diff --git a/.tekton/lsf/lsf_task/lsf-task-pr-rhel.yaml b/.tekton/lsf/lsf_task/lsf-task-pr-rhel.yaml index 919f15a3..8b7f34c7 100644 --- a/.tekton/lsf/lsf_task/lsf-task-pr-rhel.yaml +++ b/.tekton/lsf/lsf_task/lsf-task-pr-rhel.yaml @@ -86,6 +86,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: PR_REVISION diff --git a/.tekton/lsf/lsf_task/lsf-task-region.yaml b/.tekton/lsf/lsf_task/lsf-task-region.yaml index 7d849c4d..0341b909 100644 --- a/.tekton/lsf/lsf_task/lsf-task-region.yaml +++ b/.tekton/lsf/lsf_task/lsf-task-region.yaml @@ -85,6 +85,10 @@ spec: name: $(params.continuous-delivery-context-secret) key: $(params.ibmcloud-apikey-secret-key) optional: true + - name: BUILD_NUMBER + valueFrom: + fieldRef: + fieldPath: metadata.annotations['devops.cloud.ibm.com/build-number'] - name: PIPELINE_DEBUG value: $(params.pipeline-debug) - name: REVISION @@ -141,7 +145,7 @@ spec: source .tekton/scripts/suites.sh lsf_regions_suite - - name: rhel-suite-8 + - name: rhel-suite-10 onError: continue image: icr.io/continuous-delivery/pipeline/pipeline-base-ubi:latest workingDir: "/artifacts" @@ -159,4 +163,4 @@ spec: fi source .tekton/scripts/suites.sh - lsf_rhel_suite_8 + lsf_rhel_suite_10 diff --git a/.tekton/scripts/issue_track.sh b/.tekton/scripts/issue_track.sh index ed96853c..26e68255 100644 --- a/.tekton/scripts/issue_track.sh +++ b/.tekton/scripts/issue_track.sh @@ -7,7 +7,7 @@ error_check_on_all_file() { for file in "$DIRECTORY"/$pattern; do if [ -f "$file" ]; then if [[ "${file}" == *"negative"* ]]; then - infra_validation_negative_log_fail_check=$(eval "grep -E -w 'FAIL' $file") + infra_validation_negative_log_fail_check=$(eval "grep -v 'Terraform upgrade output:' $file" | grep -E -w 'FAIL') if [[ "$infra_validation_negative_log_fail_check" ]]; then results+=("true") if [[ "${infra_or_validation}" == "infra" ]]; then @@ -17,7 +17,7 @@ error_check_on_all_file() { fi fi else - infra_validation_log_error_check=$(eval "grep -E -w 'FAIL|Error|ERROR' $file") + infra_validation_log_error_check=$(eval "grep -v 'Terraform upgrade output:' $file" | grep -E -w 'FAIL|Error|ERROR') if [[ "$infra_validation_log_error_check" ]]; then results+=("true") if [[ "${infra_or_validation}" == "infra" ]]; then @@ -45,7 +45,7 @@ issue_track() { DIRECTORY="/artifacts/tests" if [ -d "$DIRECTORY" ]; then if [[ "${LOG_FILE_NAME}" == *"negative"* ]]; then - negative_log_error_check=$(eval "grep -E -w 'FAIL' $DIRECTORY/$LOG_FILE_NAME") + negative_log_error_check=$(eval "grep -v 'Terraform upgrade output:' $DIRECTORY/$LOG_FILE_NAME" | grep 'FAIL') if [[ "$negative_log_error_check" ]]; then echo "${negative_log_error_check}" echo "Found FAIL in plan/apply log. Please check log : ${LOG_FILE_NAME}" @@ -53,7 +53,7 @@ issue_track() { fi else # Track error/fail from the suites log file - log_error_check=$(eval "grep -E -w 'FAIL|Error|ERROR' $DIRECTORY/$LOG_FILE_NAME") + log_error_check=$(eval "grep -v 'Terraform upgrade output:' $DIRECTORY/$LOG_FILE_NAME" | grep -E -w 'FAIL|Error|ERROR') if [[ "$log_error_check" ]]; then echo "${log_error_check}" echo "Found Error/FAIL/ERROR in plan/apply log. Please check log : ${LOG_FILE_NAME}" @@ -102,8 +102,13 @@ display_validation_log() { echo "################################# DISPLAY ERROR ##################################" echo "##################################################################################" echo "##################################################################################" + if [[ "${LOG_FILE_NAME}" == *"negative"* ]]; then + validation_log_error_check=$(eval "grep -v 'Terraform upgrade output:' $DIRECTORY/logs/$LOG_FILE_NAME" | grep -E -w 'FAIL') + else + validation_log_error_check=$(eval "grep -v 'Terraform upgrade output:' $DIRECTORY/logs/$LOG_FILE_NAME" | grep -E -w 'FAIL|Error|ERROR') + fi + # Display if any error in validation log - validation_log_error_check=$(eval "grep -E -w 'FAIL|Error|ERROR' $DIRECTORY/logs/$LOG_FILE_NAME") if [[ "$validation_log_error_check" ]]; then echo "********************** ERROR CHECK in ${LOG_FILE_NAME} VALIDATION OUTPUT LOG **********************" echo "$validation_log_error_check" diff --git a/.tekton/scripts/push_reports.sh b/.tekton/scripts/push_reports.sh index 42f27faf..05827f63 100644 --- a/.tekton/scripts/push_reports.sh +++ b/.tekton/scripts/push_reports.sh @@ -6,10 +6,11 @@ push_reports() { PR_OR_REGRESSION=$3 suite=$4 CHECK_SOLUTION=$5 + BUILD_NUMBER=$6 HTML_FILE_NAME=$(echo "$LOG_FILE" | cut -d'.' -f1) mkdir -p "$DIRECTORY"/push_reports cd "$DIRECTORY"/push_reports || exit - COMMIT_ID="$(git log --format="%H" -n 1)" + # COMMIT_ID="$(git log --format="%H" -n 1)" time_stamp=$(date +%d-%m-%Y) if [[ "${hpc_custom_reports_repo:?}" != *.git ]]; then echo "Adding .git suffix to Repository URL" @@ -22,10 +23,10 @@ push_reports() { git clone -b "${hpc_custom_reports_branch:?}" "${REPOSITORY_CLONE}" "${suite}" cd "${suite}" || exit if [[ "$CHECK_SOLUTION" == "hpcaas" ]]; then - folder_name="hpcaas/${time_stamp}/$PR_OR_REGRESSION/${COMMIT_ID}" + folder_name="hpcaas/${time_stamp}/$PR_OR_REGRESSION/${BUILD_NUMBER}" fi if [[ "$CHECK_SOLUTION" == "lsf" ]]; then - folder_name="lsf/${time_stamp}/$PR_OR_REGRESSION/${COMMIT_ID}" + folder_name="lsf/${time_stamp}/$PR_OR_REGRESSION/${BUILD_NUMBER}" fi mkdir -p "${folder_name}" git pull origin "${hpc_custom_reports_branch:?}" @@ -33,7 +34,7 @@ push_reports() { git config --global user.name "${git_user_name:?}" git config --global user.email "${git_user_email:?}" git add . - git commit -m "report-upload-$COMMIT_ID" + git commit -m "tekton-build-number-$BUILD_NUMBER" git push origin "${hpc_custom_reports_branch:?}" -f echo "********************* GitHub Pages Link ************************" echo "Please click the below link to see the ${suite} results" diff --git a/.tekton/scripts/suites.sh b/.tekton/scripts/suites.sh index d8898d4c..175b25f6 100644 --- a/.tekton/scripts/suites.sh +++ b/.tekton/scripts/suites.sh @@ -24,13 +24,13 @@ common_suite() { # get ssh-key created based on pr-id get_pr_ssh_key "${PR_REVISION}" "${CHECK_SOLUTION}" SSH_KEY=${CICD_SSH_KEY:?} COMPUTE_IMAGE_NAME=${compute_image_name:?} LOGIN_NODE_IMAGE_NAME=${login_image_name:?} MANAGEMENT_IMAGE_NAME=${management_image_name:?} \ - ZONE=${zone:?} RESERVATION_ID=${reservation_id:?} CLUSTER_ID=${cluster_id:?} RESOURCE_GROUP=${resource_group:?} \ + ZONE=${zone:?} RESERVATION_ID=${reservation_id:?} CLUSTER_NAME=${cluster_name:?} DEFAULT_EXISTING_RESOURCE_GROUP=${resource_group:?} \ go test -v -timeout 9000m -run "${test_cases}" | tee -a "$LOG_FILE" # Upload log/test_output files to cos bucket cos_upload "PR" "${CHECK_SOLUTION}" "${DIRECTORY}" # push custom reports to custom-reports repository - push_reports "${LOG_FILE}" "${DIRECTORY}" "PR" "${suite}" "${CHECK_SOLUTION}" + push_reports "${LOG_FILE}" "${DIRECTORY}" "PR" "${suite}" "${CHECK_SOLUTION}" "${BUILD_NUMBER}" # Checking any error/issue from log file for pr issue_track "${LOG_FILE}" "PR" @@ -40,13 +40,13 @@ common_suite() { # get ssh-key created based on pr-id get_pr_ssh_key "${PR_REVISION}" "${CHECK_SOLUTION}" SSH_KEY=${CICD_SSH_KEY:?} COMPUTE_IMAGE_NAME=${compute_image_name:?} LOGIN_NODE_IMAGE_NAME=${login_image_name:?} MANAGEMENT_IMAGE_NAME=${management_image_name:?} \ - ZONE=${zone:?} SOLUTION=${solution:?} IBM_CUSTOMER_NUMBER=${ibm_customer_number:?} RESOURCE_GROUP=${resource_group:?} \ + ZONE=${zone:?} SOLUTION=${solution:?} IBM_CUSTOMER_NUMBER=${ibm_customer_number:?} DEFAULT_EXISTING_RESOURCE_GROUP=${resource_group:?} \ go test -v -timeout 9000m -run "${test_cases}" | tee -a "$LOG_FILE" # Upload log/test_output files to cos bucket cos_upload "PR" "${CHECK_SOLUTION}" "${DIRECTORY}" # push custom reports to custom-reports repository - push_reports "${LOG_FILE}" "${DIRECTORY}" "PR" "${suite}" "${CHECK_SOLUTION}" + push_reports "${LOG_FILE}" "${DIRECTORY}" "PR" "${suite}" "${CHECK_SOLUTION}" "${BUILD_NUMBER}" # Checking any error/issue from log file for pr issue_track "${LOG_FILE}" "PR" @@ -62,13 +62,13 @@ common_suite() { EU_DE_ZONE=${eu_de_zone:?} EU_DE_CLUSTER_ID=${eu_de_cluster_id:?} EU_DE_RESERVATION_ID=${eu_de_reservation_id:?} \ EU_DE_RESERVATION_ID=${eu_de_reservation_id:?} COMPUTE_IMAGE_NAME=${compute_image_name:?} \ LOGIN_NODE_IMAGE_NAME=${login_image_name:?} ZONE=${zone:?} RESERVATION_ID=${reservation_id:?} \ - CLUSTER_ID=${cluster_id:?} RESOURCE_GROUP=${resource_group:?} MANAGEMENT_IMAGE_NAME=${management_image_name:?} \ + CLUSTER_NAME=${cluster_name:?} DEFAULT_EXISTING_RESOURCE_GROUP=${resource_group:?} MANAGEMENT_IMAGE_NAME=${management_image_name:?} \ go test -v -timeout 9000m -run "${test_cases}" | tee -a "$LOG_FILE" # Upload log/test_output files to cos bucket cos_upload "REGRESSION" "${CHECK_SOLUTION}" "${DIRECTORY}" "${VALIDATION_LOG_FILE_NAME}" # push custom reports to custom-reports repository - push_reports "${LOG_FILE}" "${DIRECTORY}" "REGRESSION" "${suite}" "${CHECK_SOLUTION}" + push_reports "${LOG_FILE}" "${DIRECTORY}" "REGRESSION" "${suite}" "${CHECK_SOLUTION}" "${BUILD_NUMBER}" # Checking any error/issue from log file for commit/push issue_track "${LOG_FILE}" @@ -78,13 +78,13 @@ common_suite() { # get ssh-key created based on commit-id get_commit_ssh_key "${REVISION}" "${CHECK_SOLUTION}" SSH_KEY=${CICD_SSH_KEY:?} COMPUTE_IMAGE_NAME=${compute_image_name:?} LOGIN_NODE_IMAGE_NAME=${login_image_name:?} MANAGEMENT_IMAGE_NAME=${management_image_name:?} \ - ZONE=${zone:?} SOLUTION=${solution:?} IBM_CUSTOMER_NUMBER=${ibm_customer_number:?} RESOURCE_GROUP=${resource_group:?} \ + ZONE=${zone:?} SOLUTION=${solution:?} IBM_CUSTOMER_NUMBER=${ibm_customer_number:?} DEFAULT_EXISTING_RESOURCE_GROUP=${resource_group:?} \ go test -v -timeout 9000m -run "${test_cases}" | tee -a "$LOG_FILE" # Upload log/test_output files to cos bucket cos_upload "REGRESSION" "${CHECK_SOLUTION}" "${DIRECTORY}" "${VALIDATION_LOG_FILE_NAME}" # push custom reports to custom-reports repository - push_reports "${LOG_FILE}" "${DIRECTORY}" "REGRESSION" "${suite}" "${CHECK_SOLUTION}" + push_reports "${LOG_FILE}" "${DIRECTORY}" "REGRESSION" "${suite}" "${CHECK_SOLUTION}" "${BUILD_NUMBER}" # Checking any error/issue from log file for commit/push issue_track "${LOG_FILE}" @@ -193,7 +193,7 @@ hpcaas_regions_suite() { hpcaas_negative_suite() { suite=hpcaas-negative-suite solution=hpcaas - test_cases="TestRunHPCWithoutMandatory,TestRunHPCInvalidReservationID,TestRunInvalidLDAPServerIP,TestRunInvalidLDAPUsernamePassword,TestRunInvalidAPPCenterPassword,TestRunInvalidDomainName,TestRunKMSInstanceNameAndKMSKeyNameWithInvalidValue,TestRunExistSubnetIDVpcNameAsNull,TestRunInvalidSshKeysAndRemoteAllowedIP,TestRunHPCInvalidReservationIDAndContractID" + test_cases="TestRunHPCWithoutMandatory,TestRunHPCInvalidReservationID,TestRunInvalidLDAPServerIP,,TestRunInvalidAPPCenterPassword,TestRunInvalidDomainName,TestRunKMSInstanceNameAndKMSKeyNameWithInvalidValue,TestRunExistSubnetIDVpcNameAsNull,TestRunInvalidSshKeysAndRemoteAllowedIP,TestRunHPCInvalidReservationIDAndContractID" new_line="${test_cases//,/$'\n'}" echo "************** Going to run ${suite} ${new_line} **************" common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" @@ -286,7 +286,7 @@ lsf_rhel_suite_6() { lsf_rhel_suite_7() { suite=lsf-rhel-suite-7 solution=lsf - test_cases="TestRunCIDRsAsNonDefault,TestRunObservability" + test_cases="TestRunCIDRsAsNonDefault" new_line="${test_cases//,/$'\n'}" echo "************** Going to run ${suite} ${new_line} **************" common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" @@ -301,6 +301,37 @@ lsf_rhel_suite_8() { echo "************** Going to run ${suite} ${new_line} **************" common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" } + +# commit based suite on lsf-rhel-suite-9 +lsf_rhel_suite_9() { + suite=lsf-rhel-suite-9 + solution=lsf + export APP_CENTER_EXISTING_CERTIFICATE_INSTANCE=${pac_ha_exist_certificate:?} + test_cases="TestRunPacHa,TestRunLSFLogs" + new_line="${test_cases//,/$'\n'}" + echo "************** Going to run ${suite} ${new_line} **************" + common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" +} + +# commit based suite on lsf-rhel-suite-10 +lsf_rhel_suite_10() { + suite=lsf-rhel-suite-10 + solution=lsf + test_cases="TestRunObservabilityCloudLogsManagementAndComputeEnabled,TestRunObservabilityCloudLogsManagementEnabled,TestRunObservabilityCloudLogsManagementAndComputeDisabled" + new_line="${test_cases//,/$'\n'}" + echo "************** Going to run ${suite} ${new_line} **************" + common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" +} + +# commit based suite on lsf-rhel-suite-11 +lsf_rhel_suite_11() { + suite=lsf-rhel-suite-11 + solution=lsf + test_cases="TestRunObservabilityMonitoringForManagementEnabledAndComputeDisabled,TestRunObservabilityMonitoringForManagementAndComputeEnabled,TestRunObservabilityMonitoringForManagementAndComputeDisabled" + new_line="${test_cases//,/$'\n'}" + echo "************** Going to run ${suite} ${new_line} **************" + common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" +} # # commit based suite on ubuntu-suite-1 # lsf_ubuntu_suite_1() { # suite=lsf-ubuntu-suite-1 @@ -342,7 +373,7 @@ lsf_regions_suite() { common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" } -# negative based suite on negative-suite +# negative based suite on negative-suite-1 lsf_negative_suite_1() { suite=lsf-negative-suite-1 solution=lsf @@ -352,21 +383,41 @@ lsf_negative_suite_1() { common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" } -# negative based suite on negative-suite +# negative based suite on negative-suite-2 lsf_negative_suite_2() { suite=lsf-negative-suite-2 solution=lsf - test_cases="TestRunKMSInstanceNameAndKMSKeyNameWithInvalidValue,TestRunExistSubnetIDVpcNameAsNull,TestRunInvalidSshKeysAndRemoteAllowedIP,TestRunInvalidDedicatedHostConfigurationWithZeroWorkerNodes" + test_cases="TestRunKMSInstanceNameAndKMSKeyNameWithInvalidValue,TestRunExistSubnetIDVpcNameAsNull,TestRunInvalidSshKeysAndRemoteAllowedIP" new_line="${test_cases//,/$'\n'}" echo "************** Going to run ${suite} ${new_line} **************" common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" } -# negative based suite on negative-suite +# negative based suite on negative-suite-3 lsf_negative_suite_3() { suite=lsf-negative-suite-3 solution=lsf - test_cases="TestRunInvalidLDAPUsernamePassword,TestRunInvalidAPPCenterPassword,TestRunLSFWithoutMandatory,TestRunInvalidMinWorkerNodeCountGreaterThanMax" + test_cases="TestRunInvalidSubnetCIDR,TestRunInvalidAPPCenterPassword,TestRunLSFWithoutMandatory" + new_line="${test_cases//,/$'\n'}" + echo "************** Going to run ${suite} ${new_line} **************" + common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" +} + +# negative based suite on negative-suite-4 +lsf_negative_suite_4() { + suite=lsf-negative-suite-4 + solution=lsf + test_cases="TestRunInvalidDedicatedHostConfigurationWithZeroWorkerNodes,TestRunInvalidMinWorkerNodeCountGreaterThanMax" + new_line="${test_cases//,/$'\n'}" + echo "************** Going to run ${suite} ${new_line} **************" + common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" +} + +# negative based suite on negative-suite-5 +lsf_negative_suite_5() { + suite=lsf-negative-suite-5 + solution=lsf + test_cases="TestRunInvalidLDAPUsernamePassword,TestRunInvalidLDAPServerCert" new_line="${test_cases//,/$'\n'}" echo "************** Going to run ${suite} ${new_line} **************" common_suite "${test_cases}" "${suite}" "${compute_image_name_rhel:?}" "${solution:?}" diff --git a/modules/dedicated_host/variables.tf b/modules/dedicated_host/variables.tf index 6e209f0f..839a96be 100644 --- a/modules/dedicated_host/variables.tf +++ b/modules/dedicated_host/variables.tf @@ -33,17 +33,14 @@ variable "zone" { variable "family" { description = "Family defines the purpose of the dedicated host, The dedicated host family can be defined from balanced,compute or memory. Refer [Understanding DH Profile family](https://cloud.ibm.com/docs/vpc?topic=vpc-dh-profiles&interface=ui) for more details" type = string - default = "balanced" } variable "class" { description = "Profile class of the dedicated host, this has to be defined based on the VSI usage. Refer [Understanding DH Class](https://cloud.ibm.com/docs/vpc?topic=vpc-dh-profiles&interface=ui) for more details" type = string - default = "bx2" } variable "profile" { description = "Profile for the dedicated hosts(size and resources). Refer [Understanding DH Profile](https://cloud.ibm.com/docs/vpc?topic=vpc-dh-profiles&interface=ui) for more details" type = string - default = "bx2-host-152x608" } diff --git a/tests/README.md b/tests/README.md index 87f0ba2c..770b39c8 100644 --- a/tests/README.md +++ b/tests/README.md @@ -128,12 +128,12 @@ To override default values, pass the required parameters when executing the comm #### Example for HPC: ```sh -SOLUTION=hpc SSH_KEY=your_ssh_key ZONE=your_zone EXISTING_RESOURCE_GROUP=your_existing_resource_group RESERVATION_ID=your_reservation_id KMS_INSTANCE_ID=your_kms_instance_id KMS_KEY_NAME=your_kms_key_name IMAGE_NAME=your_image_name CLUSTER_NAME=your_cluster_name DEFAULT_EXITING_RESOURCE_GROUP=your_default_existing_resource_group NON_DEFAULT_EXITING_RESOURCE_GROUP=your_non_default_existing_resource_group LOGIN_NODE_INSTANCE_TYPE=your_login_node_instance_type MANAGEMENT_IMAGE_NAME=your_management_image_name COMPUTE_IMAGE_NAME=your_compute_image_name MANAGEMENT_NODE_INSTANCE_TYPE=your_management_node_instance_type MANAGEMENT_NODE_COUNT=your_management_node_count ENABLE_VPC_FLOW_LOGS=true KEY_MANAGEMENT=enabled KMS_INSTANCE_NAME=your_kms_instance_name HYPERTHREADING_ENABLED=true SSH_FILE_PATH=your_ssh_file_path EXISTING_CERTIFICATE_INSTANCE=existing_certificate_instance go test -v -timeout=900m -parallel=4 -run "TestRunBasic"| tee -a $LOG_FILE_NAME +SOLUTION=hpc SSH_KEY=your_ssh_key ZONE=your_zone EXISTING_RESOURCE_GROUP=your_existing_resource_group RESERVATION_ID=your_reservation_id KMS_INSTANCE_ID=your_kms_instance_id KMS_KEY_NAME=your_kms_key_name IMAGE_NAME=your_image_name CLUSTER_NAME=your_cluster_name DEFAULT_EXITING_RESOURCE_GROUP=your_default_existing_resource_group NON_DEFAULT_EXITING_RESOURCE_GROUP=your_non_default_existing_resource_group LOGIN_NODE_INSTANCE_TYPE=your_login_node_instance_type MANAGEMENT_IMAGE_NAME=your_management_image_name COMPUTE_IMAGE_NAME=your_compute_image_name MANAGEMENT_NODE_INSTANCE_TYPE=your_management_node_instance_type MANAGEMENT_NODE_COUNT=your_management_node_count ENABLE_VPC_FLOW_LOGS=true KEY_MANAGEMENT=enabled KMS_INSTANCE_NAME=your_kms_instance_name HYPERTHREADING_ENABLED=true SSH_FILE_PATH=your_ssh_file_path APP_CENTER_EXISTING_CERTIFICATE_INSTANCE=your_app_center_existing_certificate_instance go test -v -timeout=900m -parallel=4 -run "TestRunBasic"| tee -a $LOG_FILE_NAME ``` #### Example for LSF: ```sh -SOLUTION=lsf SSH_KEY=your_ssh_key ZONE=your_zone EXISTING_RESOURCE_GROUP=your_default_existing_resource_group IBM_CUSTOMER_NUMBER=your_customer_number WORKER_NODE_MAX_COUNT=your_worker_node_max_count WORKER_NODE_INSTANCE_TYPE=your_worker_node_instance_type KMS_INSTANCE_ID=your_kms_instance_id KMS_KEY_NAME=your_kms_key_name IMAGE_NAME=your_image_name CLUSTER_NAME=your_cluster_name DEFAULT_EXISTING_RESOURCE_GROUP=your_default_existing_resource_group NON_DEFAULT_EXISTING_RESOURCE_GROUP=your_non_default_existing_resource_group LOGIN_NODE_INSTANCE_TYPE=your_login_node_instance_type MANAGEMENT_IMAGE_NAME=your_management_image_name COMPUTE_IMAGE_NAME=your_compute_image_name MANAGEMENT_NODE_INSTANCE_TYPE=your_management_node_instance_type MANAGEMENT_NODE_COUNT=your_management_node_count ENABLE_VPC_FLOW_LOGS=true KEY_MANAGEMENT=enabled KMS_INSTANCE_NAME=your_kms_instance_name HYPERTHREADING_ENABLED=true SSH_FILE_PATH=your_ssh_file_path EXISTING_CERTIFICATE_INSTANCE=existing_certificate_instance go test -v -timeout=900m -parallel=4 -run "TestRunBasic" | tee -a $LOG_FILE_NAME +SOLUTION=lsf SSH_KEY=your_ssh_key ZONE=your_zone EXISTING_RESOURCE_GROUP=your_default_existing_resource_group IBM_CUSTOMER_NUMBER=your_customer_number WORKER_NODE_MAX_COUNT=your_worker_node_max_count WORKER_NODE_INSTANCE_TYPE=your_worker_node_instance_type KMS_INSTANCE_ID=your_kms_instance_id KMS_KEY_NAME=your_kms_key_name IMAGE_NAME=your_image_name CLUSTER_NAME=your_cluster_name DEFAULT_EXISTING_RESOURCE_GROUP=your_default_existing_resource_group NON_DEFAULT_EXISTING_RESOURCE_GROUP=your_non_default_existing_resource_group LOGIN_NODE_INSTANCE_TYPE=your_login_node_instance_type MANAGEMENT_IMAGE_NAME=your_management_image_name COMPUTE_IMAGE_NAME=your_compute_image_name MANAGEMENT_NODE_INSTANCE_TYPE=your_management_node_instance_type MANAGEMENT_NODE_COUNT=your_management_node_count ENABLE_VPC_FLOW_LOGS=true KEY_MANAGEMENT=enabled KMS_INSTANCE_NAME=your_kms_instance_name HYPERTHREADING_ENABLED=true SSH_FILE_PATH=your_ssh_file_path APP_CENTER_EXISTING_CERTIFICATE_INSTANCE=your_app_center_existing_certificate_instance go test -v -timeout=900m -parallel=4 -run "TestRunBasic" | tee -a $LOG_FILE_NAME ``` ### Notes: @@ -255,7 +255,7 @@ For additional help, contact the project maintainers. - **LSFCheckManagementNodeCount**: Verify the count of management nodes. - **HPCCheckContractID**: Check the contract ID for HPC. - **LSFCheckMasterName**: Verify the master node name. -- **LSFCheckClusterID**: Check the cluster ID. +- **LSFCheckClusterName**: Check the cluster Name. - **LSFIPRouteCheck**: Verify IP routing in LSF. - **LSFMTUCheck**: Check the MTU settings. - **IsDynamicNodeAvailable**: Check if a dynamic node is available. diff --git a/tests/hpc_config.yml b/tests/hpc_config.yml index 8c1da628..ab2abddd 100644 --- a/tests/hpc_config.yml +++ b/tests/hpc_config.yml @@ -2,7 +2,7 @@ solution: hpc default_resource_group: Default non_default_resource_group: WES_TEST zone: us-east-3 -cluster_id: HPC-LSF-1 +cluster_name: HPC-LSF-1 reservation_id: remote_allowed_ips: ssh_key: geretain-hpc @@ -27,15 +27,15 @@ ldap_user_name: tester ldap_user_password: Pass@123 # pragma: allowlist secret us_east_zone: us-east-3 us_east_reservation_id: -us_east_cluster_id: HPC-LSF-2 +us_east_cluster_name: HPC-LSF-2 eu_de_zone: eu-de-3 eu_de_reservation_id: -eu_de_cluster_id: HPC-LSF-1 +eu_de_cluster_name: HPC-LSF-1 us_south_zone: us-south-1 us_south_reservation_id: jp_tok_zone: jp-tok-1 -jp_tok_cluster_id: HPC-LSF-2 +jp_tok_cluster_name: HPC-LSF-2 jp_tok_reservation_id: -us_south_cluster_id: HPC-LSF-1 +us_south_cluster_name: HPC-LSF-1 ssh_file_path: /artifacts/.ssh/id_rsa ssh_file_path_two: /artifacts/.ssh/id_rsa diff --git a/tests/lsf/cluster_helpers.go b/tests/lsf/cluster_helpers.go index 6c4e9364..561fcb80 100644 --- a/tests/lsf/cluster_helpers.go +++ b/tests/lsf/cluster_helpers.go @@ -17,7 +17,7 @@ import ( func VerifyManagementNodeConfig( t *testing.T, sshMgmtClient *ssh.Client, - expectedClusterID, expectedMasterName, expectedReservationID string, + expectedClusterName, expectedMasterName, expectedReservationID string, expectedHyperthreadingStatus bool, managementNodeIPList []string, lsfVersion string, @@ -25,8 +25,8 @@ func VerifyManagementNodeConfig( logger *utils.AggregatedLogger, ) { // Verify Cluster ID - checkClusterIDErr := LSFCheckClusterID(t, sshMgmtClient, expectedClusterID, logger) - utils.LogVerificationResult(t, checkClusterIDErr, "Check Cluster ID on management node", logger) + checkClusterNameErr := LSFCheckClusterName(t, sshMgmtClient, expectedClusterName, logger) + utils.LogVerificationResult(t, checkClusterNameErr, "Check Cluster ID on management node", logger) // Verify Master Name checkMasterNameErr := LSFCheckMasterName(t, sshMgmtClient, expectedMasterName, logger) @@ -192,7 +192,7 @@ func VerifyAPPCenterConfig( func VerifyLoginNodeConfig( t *testing.T, sshLoginClient *ssh.Client, - expectedClusterID, expectedMasterName, expectedReservationID string, + expectedClusterName, expectedMasterName, expectedReservationID string, expectedHyperthreadingStatus bool, loginNodeIP string, jobCommand string, @@ -201,8 +201,8 @@ func VerifyLoginNodeConfig( ) { // Verify cluster ID - checkClusterIDErr := LSFCheckClusterID(t, sshLoginClient, expectedClusterID, logger) - utils.LogVerificationResult(t, checkClusterIDErr, "check Cluster ID on login node", logger) + checkClusterNameErr := LSFCheckClusterName(t, sshLoginClient, expectedClusterName, logger) + utils.LogVerificationResult(t, checkClusterNameErr, "check Cluster ID on login node", logger) // Verify master name checkMasterNameErr := LSFCheckMasterName(t, sshLoginClient, expectedMasterName, logger) @@ -669,9 +669,70 @@ func VerifyCloudLogs( // and comparing it against the expected profile obtained from IBM Cloud CLI. func ValidateDynamicNodeProfile(t *testing.T, apiKey, region, resourceGroup, clusterPrefix string, options *testhelper.TestOptions, logger *utils.AggregatedLogger) { - expectedDynamicWorkerProfile, expectedWorkerNodeProfileErr := utils.GetFirstWorkerNodeProfile(t, options.TerraformVars, logger) + expectedDynamicWorkerProfile, expectedWorkerNodeProfileErr := utils.GetFirstWorkerNodeInstanceType(t, options.TerraformVars, logger) utils.LogVerificationResult(t, expectedWorkerNodeProfileErr, "Fetching worker node profile", logger) validateDynamicWorkerProfileErr := ValidateDynamicWorkerProfile(t, apiKey, region, resourceGroup, clusterPrefix, expectedDynamicWorkerProfile, logger) utils.LogVerificationResult(t, validateDynamicWorkerProfileErr, "Validating dynamic worker node profile", logger) + +} + +// VerifyCloudMonitoring checks the cloud monitoring configuration and status. +// It validates cloud log URLs from Terraform outputs and monitoring services +// for management and compute nodes. The function logs verification results +// and handles errors gracefully. It takes test context, SSH client, cluster +// details, monitoring flags, and a logger as parameters. No values are +// returned; only validation outcomes are logged. +func VerifyCloudMonitoring( + t *testing.T, + sshClient *ssh.Client, + expectedSolution string, + LastTestTerraformOutputs map[string]interface{}, + managementNodeIPList []string, staticWorkerNodeIPList []string, + isCloudMonitoringEnabledForManagement, isCloudMonitoringEnabledForCompute bool, + logger *utils.AggregatedLogger) { + + // Verify cloud logs URL from Terraform outputs + err := VerifycloudMonitoringURLFromTerraformOutput(t, LastTestTerraformOutputs, isCloudMonitoringEnabledForManagement, isCloudMonitoringEnabledForCompute, logger) + utils.LogVerificationResult(t, err, "cloud logs URL from Terraform outputs", logger) + + // Verify Prometheus Dragent service for management nodes + mgmtErr := LSFPrometheusAndDragentServiceForManagementNodes(t, sshClient, managementNodeIPList, isCloudMonitoringEnabledForManagement, logger) + utils.LogVerificationResult(t, mgmtErr, "Prometheus and Dragent service for management nodes", logger) + + // Verify Prometheus Dragent service for compute nodes + compErr := LSFPrometheusAndDragentServiceForComputeNodes(t, sshClient, expectedSolution, staticWorkerNodeIPList, isCloudMonitoringEnabledForCompute, logger) + utils.LogVerificationResult(t, compErr, "Prometheus and Dragent service for compute nodes", logger) + +} + +// ValidateAtracker verifies the Atracker Route Target configuration in IBM Cloud. +// If Observability Atracker is enabled, it retrieves the target ID, ensures it meets the expected criteria, +// and validates it against the specified target type. If Observability Atracker is disabled, +// the function ensures no target ID is set. Any retrieval or validation failures are logged, +// and the function exits early in case of errors to prevent further issues. +func ValidateAtracker(t *testing.T, apiKey, region, resourceGroup, clusterPrefix, targetType string, ObservabilityAtrackerEnable bool, logger *utils.AggregatedLogger) { + + if ObservabilityAtrackerEnable { + // Fetch the Atracker Route Target ID + targetID, atrackerRouteTargetIDErr := GetAtrackerRouteTargetID(t, apiKey, region, resourceGroup, clusterPrefix, ObservabilityAtrackerEnable, logger) + if atrackerRouteTargetIDErr != nil { + utils.LogVerificationResult(t, atrackerRouteTargetIDErr, "ValidateAtracker: Failed to retrieve Atracker Route Target ID", logger) + return // Exit early to prevent further errors + } + + // Ensure Target ID is set and has a valid length when Observability Atracker is enabled + trimmedTargetID := strings.TrimSpace(targetID) + if len(trimmedTargetID) <= 36 { + utils.LogVerificationResult(t, fmt.Errorf("target ID is either missing or too short (must be more than 36 characters)"), + "ValidateAtracker: Target ID invalid", logger) + return + } + + // Validate the Atracker Route Target + atrackerRouteTargetErr := ValidateAtrackerRouteTarget(t, apiKey, region, resourceGroup, clusterPrefix, targetID, targetType, logger) + if atrackerRouteTargetErr != nil { + utils.LogVerificationResult(t, atrackerRouteTargetErr, "ValidateAtracker: Validation failed for Atracker Route Target", logger) + } + } } diff --git a/tests/lsf/cluster_utils.go b/tests/lsf/cluster_utils.go index b0d170cf..265a559e 100644 --- a/tests/lsf/cluster_utils.go +++ b/tests/lsf/cluster_utils.go @@ -106,11 +106,11 @@ func LSFIPRouteCheck(t *testing.T, sClient *ssh.Client, ipsList []string, logger return nil } -// LSFCheckClusterID checks if the provided cluster ID matches the expected value. +// LSFCheckClusterName checks if the provided cluster ID matches the expected value. // It uses the provided SSH client to execute the 'lsid' command and verifies // if the expected cluster ID is present in the command output. // Returns an error if the checks fail. -func LSFCheckClusterID(t *testing.T, sClient *ssh.Client, expectedClusterID string, logger *utils.AggregatedLogger) error { +func LSFCheckClusterName(t *testing.T, sClient *ssh.Client, expectedClusterName string, logger *utils.AggregatedLogger) error { // Execute the 'lsid' command to get the cluster ID command := "source /opt/ibm/lsf/conf/profile.lsf; lsid" @@ -120,13 +120,13 @@ func LSFCheckClusterID(t *testing.T, sClient *ssh.Client, expectedClusterID stri } // Verify if the expected cluster ID is present in the output - if !utils.VerifyDataContains(t, output, "My cluster name is "+expectedClusterID, logger) { + if !utils.VerifyDataContains(t, output, "My cluster name is "+expectedClusterName, logger) { // Extract actual cluster version from the output for better error reporting actualValue := strings.TrimSpace(strings.Split(strings.Split(output, "My cluster name is")[1], "My master name is")[0]) - return fmt.Errorf("expected cluster ID %s , but found %s", expectedClusterID, actualValue) + return fmt.Errorf("expected cluster ID %s , but found %s", expectedClusterName, actualValue) } // Log success if no errors occurred - logger.Info(t, fmt.Sprintf("Cluster ID is set as expected : %s", expectedClusterID)) + logger.Info(t, fmt.Sprintf("Cluster ID is set as expected : %s", expectedClusterName)) return nil } @@ -1337,7 +1337,7 @@ func VerifyEncryption(t *testing.T, apiKey, region, resourceGroup, clusterPrefix // ValidateRequiredEnvironmentVariables checks if the required environment variables are set and valid func ValidateRequiredEnvironmentVariables(envVars map[string]string) error { - requiredVars := []string{"SSH_FILE_PATH", "SSH_KEY", "CLUSTER_ID", "ZONE", "RESERVATION_ID"} + requiredVars := []string{"SSH_FILE_PATH", "SSH_KEY", "CLUSTER_NAME", "ZONE", "RESERVATION_ID"} for _, fieldName := range requiredVars { fieldValue, ok := envVars[fieldName] if !ok || fieldValue == "" { @@ -2161,11 +2161,11 @@ func GetLDAPServerCert(publicHostName, bastionIP, ldapHostName, ldapServerIP str // It extracts the cluster ID, reservation ID, and cluster prefix from the provided test options. // Returns the cluster ID, reservation ID, and cluster prefix as strings. func GetClusterInfo(options *testhelper.TestOptions) (string, string, string) { - var clusterID, reservationID, clusterPrefix string + var ClusterName, reservationID, clusterPrefix string // Retrieve values safely with type assertion - if id, ok := options.TerraformVars["cluster_id"].(string); ok { - clusterID = id + if id, ok := options.TerraformVars["cluster_name"].(string); ok { + ClusterName = id } if reservation, ok := options.TerraformVars["reservation_id"].(string); ok { reservationID = reservation @@ -2174,7 +2174,7 @@ func GetClusterInfo(options *testhelper.TestOptions) (string, string, string) { clusterPrefix = prefix } - return clusterID, reservationID, clusterPrefix + return ClusterName, reservationID, clusterPrefix } // SetJobCommands generates job commands customized for the specified solution type and zone. @@ -3432,6 +3432,199 @@ func CheckPlatformLogsPresent(t *testing.T, apiKey, region, resourceGroup string return true, nil } +// VerifyCloudMonitoringURLFromTerraformOutput validates the cloud log URL in Terraform outputs. +// It checks required fields in the Terraform output map and ensures the cloud logs URL +// is present when cloud logging is enabled for either management or compute nodes. +// If validation fails, it returns an error; otherwise, it logs success. + +func VerifycloudMonitoringURLFromTerraformOutput(t *testing.T, LastTestTerraformOutputs map[string]interface{}, isCloudMonitoringEnabledForManagement, isCloudMonitoringEnabledForCompute bool, logger *utils.AggregatedLogger) error { + + logger.Info(t, fmt.Sprintf("Terraform Outputs: %+v", LastTestTerraformOutputs)) + + // Required fields for validation + requiredFields := []string{ + "ssh_to_management_node_1", + "ssh_to_login_node", + "region_name", + "vpc_name", + } + + // Validate required fields + for _, field := range requiredFields { + value, ok := LastTestTerraformOutputs[field].(string) + if !ok || len(strings.TrimSpace(value)) == 0 { + return fmt.Errorf("field '%s' is missing or empty in Terraform outputs", field) + } + logger.Info(t, fmt.Sprintf("%s = %s", field, value)) + } + + // Validate cloud_monitoring_url if logging is enabled + if isCloudMonitoringEnabledForManagement || isCloudMonitoringEnabledForCompute { + cloudLogsURL, ok := LastTestTerraformOutputs["cloud_monitoring_url"].(string) + if !ok || len(strings.TrimSpace(cloudLogsURL)) == 0 { + return errors.New("missing or empty 'cloud_monitoring_url' in Terraform outputs") + } + logger.Info(t, fmt.Sprintf("cloud_monitoring_url = %s", cloudLogsURL)) + statusCode, err := utils.CheckAPIStatus(cloudLogsURL) + if err != nil { + return fmt.Errorf("error checking cloud_monitoring_url API: %v", err) + } + + logger.Info(t, fmt.Sprintf("API Status: %s - %d", cloudLogsURL, statusCode)) + + if statusCode < 200 || statusCode >= 500 { + logger.Warn(t, fmt.Sprintf("API returned non-success status: %d", statusCode)) + return fmt.Errorf("API returned non-success status: %d", statusCode) + } else { + logger.PASS(t, fmt.Sprintf("API returned success status: %d", statusCode)) + } + logger.Info(t, fmt.Sprintf("API Status: %s - %d\n", cloudLogsURL, statusCode)) + } + + logger.Info(t, "cloud_monitoring_url Terraform output validation completed successfully") + return nil +} + +// LSFPrometheusAndDragentServiceForManagementNodes validates the Prometheus and Dragent services for management nodes. +// If cloud monitoring is enabled, it connects via SSH to each management node and verifies service statuses. +// The function logs results and returns an error if any node fails validation. + +func LSFPrometheusAndDragentServiceForManagementNodes(t *testing.T, sshClient *ssh.Client, managementNodeIPs []string, isCloudMonitoringEnabledForManagement bool, logger *utils.AggregatedLogger) error { + + // Ensure management node IPs are provided if cloud logs are enabled + if isCloudMonitoringEnabledForManagement { + if len(managementNodeIPs) == 0 { + return errors.New("management node IPs cannot be empty") + } + + for _, managementIP := range managementNodeIPs { + + err := VerifyLSFPrometheusServiceForNode(t, sshClient, managementIP, logger) + if err != nil { + return fmt.Errorf("failed Prometheus service verification for management node %s: %w", managementIP, err) + } + + err = VerifyLSFdragentServiceForNode(t, sshClient, managementIP, logger) + if err != nil { + return fmt.Errorf("failed dragent service verification for management node %s: %w", managementIP, err) + } + } + } else { + logger.Warn(t, "Cloud monitoring are not enabled for the management node. As a result, the Prometheus and Fluent dragent service will not be validated.") + } + + return nil +} + +// LSFPrometheusAndDragentServiceForComputeNodes validates the Prometheus and Dragent services for compute nodes. +// If cloud monitoring is enabled, it retrieves compute node IPs and verifies service statuses via SSH. +// The function logs results and returns an error if any node fails validation. + +func LSFPrometheusAndDragentServiceForComputeNodes( + t *testing.T, + sshClient *ssh.Client, + expectedSolution string, + staticWorkerNodeIPs []string, + isCloudMonitoringEnabledForCompute bool, + logger *utils.AggregatedLogger) error { + + // Ensure worker node IPs are provided if cloud logs are enabled + if isCloudMonitoringEnabledForCompute { + if len(staticWorkerNodeIPs) == 0 { + return errors.New("worker node IPs cannot be empty") + } + + // Retrieve compute node IPs from the worker nodes + computeNodeIPs, err := GetComputeNodeIPs(t, sshClient, logger, expectedSolution, staticWorkerNodeIPs) + if err != nil || len(computeNodeIPs) == 0 { + return fmt.Errorf("failed to retrieve compute node IPs: %w", err) + } + + // Iterate over each compute node and verify Prometheus service + for _, computeIP := range computeNodeIPs { + err := VerifyLSFPrometheusServiceForNode(t, sshClient, computeIP, logger) + if err != nil { + return fmt.Errorf("failed Prometheus service verification for compute node %s: %w", computeIP, err) + } + + err = VerifyLSFdragentServiceForNode(t, sshClient, computeIP, logger) + if err != nil { + return fmt.Errorf("failed dragent service verification for compute node %s: %w", computeIP, err) + } + + } + } else { + logger.Warn(t, "Cloud monitoring are not enabled for the compute node. As a result, the Prometheus and Fluent dragent service will not be validated.") + } + return nil +} + +// VerifyLSFPrometheusServiceForNode checks the status of the Prometheus service on a given node. +// It ensures the service is running and returns an error if its state does not match "active (running)." +func VerifyLSFPrometheusServiceForNode( + t *testing.T, + sshClient *ssh.Client, + nodeIP string, + logger *utils.AggregatedLogger) error { + + // Command to check the status of Prometheus service on the node + command := fmt.Sprintf("ssh %s systemctl status prometheus", nodeIP) + output, err := utils.RunCommandInSSHSession(sshClient, command) + if err != nil { + // Return an error if the command fails to execute + return fmt.Errorf("failed to execute command '%s' on node %s: %w", command, nodeIP, err) + } + + // Expected Fluent Bit service state should be "active (running)" + expectedState := "Active: active (running)" + + // Verify if the service is in the expected running state + if !utils.VerifyDataContains(t, output, expectedState, logger) { + // If the service state does not match the expected state, return an error with output + return fmt.Errorf( + "unexpected Prometheus service state for node %s: expected '%s', got:\n%s", + nodeIP, expectedState, output, + ) + } + + // Log success if Fluent Bit service is running as expected + logger.Info(t, fmt.Sprintf("Prometheus service validation passed for node %s", nodeIP)) + return nil +} + +// VerifyLSFDragentServiceForNode checks the status of the Dragent service on a given node. +// It ensures the service is running and returns an error if its state does not match "active (running)." +func VerifyLSFdragentServiceForNode( + t *testing.T, + sshClient *ssh.Client, + nodeIP string, + logger *utils.AggregatedLogger) error { + + // Command to check the status of Prometheus service on the node + command := fmt.Sprintf("ssh %s systemctl status dragent", nodeIP) + output, err := utils.RunCommandInSSHSession(sshClient, command) + if err != nil { + // Return an error if the command fails to execute + return fmt.Errorf("failed to execute command '%s' on node %s: %w", command, nodeIP, err) + } + + // Expected Fluent Bit service state should be "active (running)" + expectedState := "Active: active (running)" + + // Verify if the service is in the expected running state + if !utils.VerifyDataContains(t, output, expectedState, logger) { + // If the service state does not match the expected state, return an error with output + return fmt.Errorf( + "unexpected dragent service state for node %s: expected '%s', got:\n%s", + nodeIP, expectedState, output, + ) + } + + // Log success if Fluent Bit service is running as expected + logger.Info(t, fmt.Sprintf("dragent service validation passed for node %s", nodeIP)) + return nil +} + // ValidateDynamicWorkerProfile checks if the dynamic worker node profile matches the expected value. // It logs into IBM Cloud, fetches cluster resources, extracts the worker profile, and validates it. // Returns an error if the actual profile differs from the expected profile; otherwise, it returns nil. @@ -3471,7 +3664,135 @@ func ValidateDynamicWorkerProfile(t *testing.T, apiKey, region, resourceGroup, c return fmt.Errorf("dynamic worker node profile mismatch: actual: '%s', expected: '%s', output: '%s'", actualDynamicWorkerProfile, expectedDynamicWorkerProfile, clusterResourceList) } - logger.Info(t, "Dynamic worker node profile matches the first profile from worker node instance type") + return nil +} + +// GetAtrackerRouteTargetID retrieves the Atracker route target ID from IBM Cloud. +// It logs into IBM Cloud, fetches route details, and extracts the target ID if Observability Atracker is enabled. +// If Observability Atracker is disabled, it ensures no Atracker route exists. +// Returns the target ID if found or an error if retrieval or validation fails. +func GetAtrackerRouteTargetID(t *testing.T, apiKey, region, resourceGroup, clusterPrefix string, ObservabilityAtrackerEnable bool, logger *utils.AggregatedLogger) (string, error) { + + type Rule struct { + TargetIDs []string `json:"target_ids"` + } + type RouteResponse struct { + ID string `json:"id"` + Name string `json:"name"` + CRN string `json:"crn"` + Rules []Rule `json:"rules"` + } + + if strings.Contains(resourceGroup, "null") { + resourceGroup = fmt.Sprintf("%s-workload-rg", clusterPrefix) + } + + if err := utils.LoginIntoIBMCloudUsingCLI(t, apiKey, region, resourceGroup); err != nil { + return "", fmt.Errorf("failed to log in to IBM Cloud: %w", err) + } + + cmd := exec.Command("ibmcloud", "atracker", "route", "get", "--route", fmt.Sprintf("%s-atracker-route", clusterPrefix), "--output", "JSON") + output, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to retrieve atracker route: %s, error: %w", string(output), err) + } + + var response RouteResponse + if err := json.Unmarshal(output, &response); err != nil { + return "", fmt.Errorf("error unmarshaling JSON: %w. Raw output: %s", err, string(output)) + } + + jsonResp, _ := json.MarshalIndent(response, "", " ") + logger.Info(t, fmt.Sprintf("Atracker Route Response: %s", string(jsonResp))) + + expectedRouteName := fmt.Sprintf("%s-atracker-route", clusterPrefix) + if !utils.VerifyDataContains(t, strings.TrimSpace(response.Name), expectedRouteName, logger) { + return "", fmt.Errorf("unexpected atracker route name: got %s, want %s", response.Name, expectedRouteName) + } + + if len(response.Rules) == 0 || len(response.Rules[0].TargetIDs) == 0 { + return "", errors.New("no target IDs found in rules") + } + + logger.Info(t, fmt.Sprintf("Target ID: %s", response.Rules[0].TargetIDs[0])) + return response.Rules[0].TargetIDs[0], nil +} + +// ValidateAtrackerRouteTarget verifies the properties of an Atracker route target in IBM Cloud. +// It logs into IBM Cloud, fetches the target details, and ensures that the target ID, name, +// type, write status, and CRN meet expected values. If any validation fails, it returns an error. +func ValidateAtrackerRouteTarget(t *testing.T, apiKey, region, resourceGroup, clusterPrefix, targetID, targetType string, logger *utils.AggregatedLogger) error { + // Define response structures + type WriteStatus struct { + Status string `json:"status"` + } + type TargetResponse struct { + ID string `json:"id"` + Name string `json:"name"` + CRN string `json:"crn"` + TargetType string `json:"target_type"` + WriteStatus WriteStatus `json:"write_status"` + } + + // Handle null resourceGroup + if strings.Contains(resourceGroup, "null") { + resourceGroup = fmt.Sprintf("%s-workload-rg", clusterPrefix) + } + + // Login to IBM Cloud + if err := utils.LoginIntoIBMCloudUsingCLI(t, apiKey, region, resourceGroup); err != nil { + return fmt.Errorf("failed to log in to IBM Cloud: %w", err) + } + + // Execute command to get Atracker target details + cmd := exec.Command("bash", "-c", fmt.Sprintf("ibmcloud atracker target validate --target %s --output JSON", targetID)) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to retrieve atracker target: %s, error: %w", string(output), err) + } + + // Parse JSON response + var response TargetResponse + if err := json.Unmarshal(output, &response); err != nil { + return fmt.Errorf("error unmarshaling JSON: %s, error: %w", string(output), err) + } + + // Log the parsed response + logger.Info(t, fmt.Sprintf("Atracker Target Response: %+v", response)) + + // Expected target name based on targetType + expectedTargetName := fmt.Sprintf("%s-atracker", clusterPrefix) + if targetType == "cloudlogs" { + expectedTargetName = fmt.Sprintf("%s-atracker-target", clusterPrefix) + } + + // Validate target name + if !utils.VerifyDataContains(t, strings.TrimSpace(response.Name), expectedTargetName, logger) { + return fmt.Errorf("unexpected atracker target name: got %s, want %s", response.Name, expectedTargetName) + } + + // Validate write status + if !utils.VerifyDataContains(t, strings.TrimSpace(response.WriteStatus.Status), "success", logger) { + return fmt.Errorf("unexpected write status: got %s, want success", response.WriteStatus.Status) + } + + // Normalize targetType before validation + expectedTargetType := targetType + if targetType == "cloudlogs" { + expectedTargetType = "cloud_logs" + } else if targetType == "cos" { + expectedTargetType = "cloud_object_storage" + } + + // Validate target type + if !utils.VerifyDataContains(t, strings.TrimSpace(response.TargetType), expectedTargetType, logger) { + return fmt.Errorf("unexpected target type: got %s, want %s", response.TargetType, expectedTargetType) + } + + // Validate CRN presence + if response.CRN == "" { + return errors.New("CRN value should not be empty") + } return nil } diff --git a/tests/lsf/cluster_validation.go b/tests/lsf/cluster_validation.go index a62884dd..c2d7aff7 100644 --- a/tests/lsf/cluster_validation.go +++ b/tests/lsf/cluster_validation.go @@ -24,9 +24,9 @@ import ( func ValidateClusterConfiguration(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] expectedNumOfKeys := len(options.TerraformVars["compute_ssh_keys"].([]string)) @@ -62,7 +62,7 @@ func ValidateClusterConfiguration(t *testing.T, options *testhelper.TestOptions, t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Verify SSH key on management nodes VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "management", managementNodeIPList, expectedNumOfKeys, testLogger) @@ -115,7 +115,7 @@ func ValidateClusterConfiguration(t *testing.T, options *testhelper.TestOptions, defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify SSH connectivity from login node VerifySSHConnectivityToNodesFromLogin(t, sshLoginNodeClient, managementNodeIPList, computeNodeIPList, testLogger) @@ -149,9 +149,9 @@ func ValidateClusterConfiguration(t *testing.T, options *testhelper.TestOptions, func ValidateClusterConfigurationWithAPPCenter(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] expectedNumOfKeys := len(options.TerraformVars["compute_ssh_keys"].([]string)) @@ -187,7 +187,7 @@ func ValidateClusterConfigurationWithAPPCenter(t *testing.T, options *testhelper t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Verify application center configuration VerifyAPPCenterConfig(t, sshClient, testLogger) @@ -246,7 +246,7 @@ func ValidateClusterConfigurationWithAPPCenter(t *testing.T, options *testhelper defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify SSH connectivity from login node VerifySSHConnectivityToNodesFromLogin(t, sshLoginNodeClient, managementNodeIPList, computeNodeIPList, testLogger) @@ -280,9 +280,9 @@ func ValidateClusterConfigurationWithAPPCenter(t *testing.T, options *testhelper func ValidateClusterConfigurationWithPACHA(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] expectedNumOfKeys := len(options.TerraformVars["compute_ssh_keys"].([]string)) @@ -331,7 +331,7 @@ func ValidateClusterConfigurationWithPACHA(t *testing.T, options *testhelper.Tes t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Verify application center configuration VerifyAPPCenterConfig(t, sshClient, testLogger) @@ -396,7 +396,7 @@ func ValidateClusterConfigurationWithPACHA(t *testing.T, options *testhelper.Tes defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify SSH connectivity from login node VerifySSHConnectivityToNodesFromLogin(t, sshLoginNodeClient, managementNodeIPList, computeNodeIPList, testLogger) @@ -421,14 +421,13 @@ func ValidateClusterConfigurationWithPACHA(t *testing.T, options *testhelper.Tes // It performs validation tasks on essential aspects of the cluster setup, // including the management node, compute nodes, and login node configurations. // Additionally, it ensures proper connectivity and functionality. -// The dynamic worker node profile should be created based on the first worker instance type object. // This function doesn't return any value but logs errors and validation steps during the process. func ValidateBasicClusterConfiguration(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] @@ -463,7 +462,7 @@ func ValidateBasicClusterConfiguration(t *testing.T, options *testhelper.TestOpt t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Wait for dynamic node disappearance and handle potential errors defer func() { @@ -475,6 +474,95 @@ func ValidateBasicClusterConfiguration(t *testing.T, options *testhelper.TestOpt // Run job VerifyJobs(t, sshClient, jobCommandLow, testLogger) + // Get compute node IPs and handle errors + computeNodeIPList, err := GetComputeNodeIPs(t, sshClient, testLogger, expectedSolution, staticWorkerNodeIPList) + if err != nil { + t.Fatalf("Failed to retrieve dynamic compute node IPs: %v", err) + } + + // Verify compute node configuration + VerifyComputeNodeConfig(t, sshClient, expectedHyperthreadingEnabled, computeNodeIPList, testLogger) + + // Verify SSH connectivity from login node and handle connection errors + sshLoginNodeClient, connectionErr := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, loginNodeIP) + require.NoError(t, connectionErr, "Failed to connect to the login node via SSH") + defer sshLoginNodeClient.Close() + + // Verify login node configuration + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + + // Verify PTR records + VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) + + // Verify LSF DNS on login node + VerifyLSFDNS(t, sshClient, []string{loginNodeIP}, expectedDnsDomainName, testLogger) + + // Verify file share encryption + VerifyFileShareEncryption(t, sshClient, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, expectedKeyManagement, managementNodeIPList, testLogger) + + // Log validation end + testLogger.Info(t, t.Name()+" Validation ended") +} + +// ValidateBasicClusterConfigurationWithDynamicProfile validates basic cluster configuration. +// It performs validation tasks on essential aspects of the cluster setup, +// including the management node, compute nodes, and login node configurations. +// Additionally, it ensures proper connectivity and functionality. +// The dynamic worker node profile should be created based on the first worker instance type object. +// This function doesn't return any value but logs errors and validation steps during the process. +func ValidateBasicClusterConfigurationWithDynamicProfile(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { + // Retrieve common cluster details from options + expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) + + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) + expectedKeyManagement := options.TerraformVars["key_management"].(string) + + expectedZone := options.TerraformVars["zones"].([]string)[0] + + expectedDnsDomainName, ok := options.TerraformVars["dns_domain_name"].(map[string]string)["compute"] + require.True(t, ok, "Key 'compute' does not exist in dns_domain_name map or dns_domain_name is not of type map[string]string") + + expectedHyperthreadingEnabled, err := strconv.ParseBool(options.TerraformVars["hyperthreading_enabled"].(string)) + require.NoError(t, err, "Error parsing hyperthreading_enabled: %v", err) + + // Set job commands based on solution type + jobCommandLow, jobCommandMed := SetJobCommands(expectedSolution, expectedZone) + + // Run the test consistency check + clusterCreationErr := ValidateClusterCreation(t, options, testLogger) + if clusterCreationErr != nil { + require.NoError(t, clusterCreationErr, "Cluster creation validation failed: %v") + } + + // Retrieve server IPs (different logic for HPC vs LSF solutions) + bastionIP, managementNodeIPList, loginNodeIP, staticWorkerNodeIPList, ipRetrievalError := GetClusterIPs(t, options, expectedSolution, testLogger) + require.NoError(t, ipRetrievalError, "Error occurred while getting server IPs: %v", ipRetrievalError) + + // Log validation start + testLogger.Info(t, t.Name()+" Validation started ......") + + // Connect to the master node via SSH and handle connection errors + sshClient, connectionErr := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList[0]) + require.NoError(t, connectionErr, "Failed to connect to the master via SSH") + defer sshClient.Close() + + testLogger.Info(t, "SSH connection to the master successful") + t.Log("Validation in progress. Please wait...") + + // Verify management node configuration + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + + // Wait for dynamic node disappearance and handle potential errors + defer func() { + if err := WaitForDynamicNodeDisappearance(t, sshClient, testLogger); err != nil { + t.Errorf("Error in WaitForDynamicNodeDisappearance: %v", err) + } + }() + + // Run job + VerifyJobs(t, sshClient, jobCommandMed, testLogger) + // Verify dynamic node profile ValidateDynamicNodeProfile(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, options, testLogger) @@ -493,7 +581,7 @@ func ValidateBasicClusterConfiguration(t *testing.T, options *testhelper.TestOpt defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify PTR records VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) @@ -516,9 +604,9 @@ func ValidateBasicClusterConfiguration(t *testing.T, options *testhelper.TestOpt func ValidateLDAPClusterConfiguration(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) expectedLdapDomain, ldapAdminPassword, ldapUserName, ldapUserPassword := GetLDAPServerCredentialsInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] expectedNumOfKeys := len(options.TerraformVars["compute_ssh_keys"].([]string)) @@ -554,7 +642,7 @@ func ValidateLDAPClusterConfiguration(t *testing.T, options *testhelper.TestOpti t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Verify SSH key on management nodes VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "management", managementNodeIPList, expectedNumOfKeys, testLogger) @@ -607,7 +695,7 @@ func ValidateLDAPClusterConfiguration(t *testing.T, options *testhelper.TestOpti defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify SSH connectivity from login node VerifySSHConnectivityToNodesFromLogin(t, sshLoginNodeClient, managementNodeIPList, computeNodeIPList, testLogger) @@ -650,9 +738,9 @@ func ValidateLDAPClusterConfiguration(t *testing.T, options *testhelper.TestOpti func ValidatePACANDLDAPClusterConfiguration(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) expectedLdapDomain, ldapAdminPassword, ldapUserName, ldapUserPassword := GetLDAPServerCredentialsInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] expectedNumOfKeys := len(options.TerraformVars["compute_ssh_keys"].([]string)) @@ -688,7 +776,7 @@ func ValidatePACANDLDAPClusterConfiguration(t *testing.T, options *testhelper.Te t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Verify application center configuration VerifyAPPCenterConfig(t, sshClient, testLogger) @@ -747,7 +835,7 @@ func ValidatePACANDLDAPClusterConfiguration(t *testing.T, options *testhelper.Te defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify SSH connectivity from login node VerifySSHConnectivityToNodesFromLogin(t, sshLoginNodeClient, managementNodeIPList, computeNodeIPList, testLogger) @@ -791,9 +879,9 @@ func ValidatePACANDLDAPClusterConfiguration(t *testing.T, options *testhelper.Te func ValidateBasicClusterConfigurationWithVPCFlowLogsAndCos(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] @@ -830,7 +918,7 @@ func ValidateBasicClusterConfigurationWithVPCFlowLogsAndCos(t *testing.T, option t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Wait for dynamic node disappearance and handle potential errors defer func() { @@ -857,7 +945,7 @@ func ValidateBasicClusterConfigurationWithVPCFlowLogsAndCos(t *testing.T, option defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify PTR records VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) @@ -883,9 +971,9 @@ func ValidateBasicClusterConfigurationWithVPCFlowLogsAndCos(t *testing.T, option func ValidateClusterConfigurationWithMultipleKeys(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] expectedNumOfKeys := len(options.TerraformVars["compute_ssh_keys"].([]string)) @@ -923,8 +1011,8 @@ func ValidateClusterConfigurationWithMultipleKeys(t *testing.T, options *testhel t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClientOne, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) - VerifyManagementNodeConfig(t, sshClientTwo, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClientOne, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClientTwo, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Verify SSH key on management node VerifySSHKey(t, sshClientOne, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "management", managementNodeIPList, expectedNumOfKeys, testLogger) @@ -974,7 +1062,7 @@ func ValidateClusterConfigurationWithMultipleKeys(t *testing.T, options *testhel defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Get compute node IPs and handle errors computeNodeIPList, err = GetComputeNodeIPs(t, sshClientOne, testLogger, expectedSolution, staticWorkerNodeIPList) @@ -1003,9 +1091,9 @@ func ValidateExistingLDAPClusterConfig(t *testing.T, ldapServerBastionIP, ldapSe // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] expectedNumOfKeys := len(options.TerraformVars["compute_ssh_keys"].([]string)) @@ -1041,7 +1129,7 @@ func ValidateExistingLDAPClusterConfig(t *testing.T, ldapServerBastionIP, ldapSe t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Verify SSH key on management nodes VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "management", managementNodeIPList, expectedNumOfKeys, testLogger) @@ -1093,7 +1181,7 @@ func ValidateExistingLDAPClusterConfig(t *testing.T, ldapServerBastionIP, ldapSe defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify SSH connectivity from login node VerifySSHConnectivityToNodesFromLogin(t, sshLoginNodeClient, managementNodeIPList, computeNodeIPList, testLogger) @@ -1141,9 +1229,9 @@ func ValidateExistingLDAPClusterConfig(t *testing.T, ldapServerBastionIP, ldapSe func ValidateBasicClusterConfigurationLSFLogs(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve cluster details from the options provided for validation expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) expectedZone := options.TerraformVars["zones"].([]string)[0] - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) // Parse hyperthreading setting expectedHyperthreadingEnabled, err := strconv.ParseBool(options.TerraformVars["hyperthreading_enabled"].(string)) @@ -1174,7 +1262,7 @@ func ValidateBasicClusterConfigurationLSFLogs(t *testing.T, options *testhelper. t.Log("Validation in progress. Please wait...") // Validate management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Validate LSF logs: Check if the logs are stored in their correct directory and ensure symbolic links are present ValidateLSFLogs(t, sshClient, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, bastionIP, managementNodeIPList, testLogger) @@ -1209,7 +1297,7 @@ func ValidateBasicClusterConfigurationLSFLogs(t *testing.T, options *testhelper. defer sshLoginNodeClient.Close() // Validate login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Log the end of validation testLogger.Info(t, t.Name()+" Validation ended") @@ -1223,10 +1311,10 @@ func ValidateBasicClusterConfigurationLSFLogs(t *testing.T, options *testhelper. func ValidateBasicClusterConfigurationWithDedicatedHost(t *testing.T, options *testhelper.TestOptions, expectedDedicatedHostPresence bool, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) expectedKeyManagement := options.TerraformVars["key_management"].(string) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) - WorkerNodeMinCount, err := utils.GetWorkerNodeTotalCount(t, options.TerraformVars, testLogger) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) + WorkerNodeMinCount, err := utils.GetTotalWorkerNodeCount(t, options.TerraformVars, testLogger) require.NoError(t, err, "Error retrieving worker node total count") expectedZone := options.TerraformVars["zones"].([]string)[0] @@ -1263,7 +1351,7 @@ func ValidateBasicClusterConfigurationWithDedicatedHost(t *testing.T, options *t t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Verify dedicated host configuration ValidateDedicatedHost(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, WorkerNodeMinCount, expectedDedicatedHostPresence, testLogger) @@ -1293,7 +1381,7 @@ func ValidateBasicClusterConfigurationWithDedicatedHost(t *testing.T, options *t defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify PTR records for management and login nodes VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) @@ -1317,9 +1405,9 @@ func ValidateBasicClusterConfigurationWithDedicatedHost(t *testing.T, options *t func ValidateBasicClusterConfigurationWithSCC(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] @@ -1356,7 +1444,7 @@ func ValidateBasicClusterConfigurationWithSCC(t *testing.T, options *testhelper. t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Verify SCC instance ValidateSCCInstance(t, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, SCC_INSTANCE_REGION, testLogger) @@ -1386,7 +1474,7 @@ func ValidateBasicClusterConfigurationWithSCC(t *testing.T, options *testhelper. defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify PTR records VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) @@ -1410,9 +1498,9 @@ func ValidateBasicClusterConfigurationWithSCC(t *testing.T, options *testhelper. func ValidateBasicClusterConfigurationWithCloudLogs(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { // Retrieve common cluster details from options expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) - expectedClusterID, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) - expectedResourceGroup := options.TerraformVars["resource_group"].(string) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) expectedKeyManagement := options.TerraformVars["key_management"].(string) expectedZone := options.TerraformVars["zones"].([]string)[0] @@ -1453,7 +1541,7 @@ func ValidateBasicClusterConfigurationWithCloudLogs(t *testing.T, options *testh t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) // Wait for dynamic node disappearance and handle potential errors defer func() { @@ -1483,7 +1571,7 @@ func ValidateBasicClusterConfigurationWithCloudLogs(t *testing.T, options *testh defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Verify PTR records VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) @@ -1498,6 +1586,191 @@ func ValidateBasicClusterConfigurationWithCloudLogs(t *testing.T, options *testh testLogger.Info(t, t.Name()+" Validation ended") } +// ValidateBasicClusterConfigurationWithCloudMonitoring validates essential cluster configurations and logs errors. +// This function ensures that the management, compute, and login nodes meet the required configurations. +// It establishes SSH connections to nodes, validates DNS, encryption, and logs observability settings. +// Errors are handled explicitly, and validation steps are logged for debugging. +// Key validation and configuration checks ensure that the cluster setup adheres to standards. + +func ValidateBasicClusterConfigurationWithCloudMonitoring(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { + // Retrieve common cluster details from options + expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) + + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) + expectedKeyManagement := options.TerraformVars["key_management"].(string) + expectedZone := options.TerraformVars["zones"].([]string)[0] + + expectedDnsDomainName, ok := options.TerraformVars["dns_domain_name"].(map[string]string)["compute"] + require.True(t, ok, "Key 'compute' does not exist in dns_domain_name map or dns_domain_name is not of type map[string]string") + + expectedHyperthreadingEnabled, err := strconv.ParseBool(options.TerraformVars["hyperthreading_enabled"].(string)) + require.NoError(t, err, "Error parsing hyperthreading_enabled: %v", err) + + expectedMonitoringEnabledForManagement, err := strconv.ParseBool(fmt.Sprintf("%v", options.TerraformVars["observability_monitoring_enable"])) + require.NoError(t, err, "Error parsing observability_monitoring_enable") + + expectedMonitoringEnabledForCompute, err := strconv.ParseBool(fmt.Sprintf("%v", options.TerraformVars["observability_monitoring_on_compute_nodes_enable"])) + require.NoError(t, err, "Error parsing observability_monitoring_on_compute_nodes_enable") + + // Set job commands based on solution type + jobCommandLow, jobCommandMed := SetJobCommands(expectedSolution, expectedZone) + + // Run the test consistency check + clusterCreationErr := ValidateClusterCreation(t, options, testLogger) + if clusterCreationErr != nil { + require.NoError(t, clusterCreationErr, "Cluster creation validation failed: %v") + } + + // Retrieve server IPs (different logic for HPC vs LSF solutions) + bastionIP, managementNodeIPList, loginNodeIP, staticWorkerNodeIPList, ipRetrievalError := GetClusterIPs(t, options, expectedSolution, testLogger) + require.NoError(t, ipRetrievalError, "Error occurred while getting server IPs: %v", ipRetrievalError) + + // Log validation start + testLogger.Info(t, t.Name()+" Validation started ......") + + // Connect to the master node via SSH and handle connection errors + sshClient, connectionErr := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList[0]) + require.NoError(t, connectionErr, "Failed to connect to the master via SSH") + defer sshClient.Close() + + testLogger.Info(t, "SSH connection to the master successful") + t.Log("Validation in progress. Please wait...") + + // Verify management node configuration + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + + // Wait for dynamic node disappearance and handle potential errors + defer func() { + if err := WaitForDynamicNodeDisappearance(t, sshClient, testLogger); err != nil { + t.Errorf("Error in WaitForDynamicNodeDisappearance: %v", err) + } + }() + + // Run job + VerifyJobs(t, sshClient, jobCommandMed, testLogger) + + // Get static and dynamic compute node IPs and handle errors + computeNodeIPList, err := GetComputeNodeIPs(t, sshClient, testLogger, expectedSolution, staticWorkerNodeIPList) + if err != nil { + t.Fatalf("Failed to retrieve dynamic compute node IPs: %v", err) + } + + // Verify compute node configuration + VerifyComputeNodeConfig(t, sshClient, expectedHyperthreadingEnabled, computeNodeIPList, testLogger) + + // Verify that cloud monitoring are enabled and correctly configured + VerifyCloudMonitoring(t, sshClient, expectedSolution, options.LastTestTerraformOutputs, managementNodeIPList, staticWorkerNodeIPList, expectedMonitoringEnabledForManagement, expectedMonitoringEnabledForCompute, testLogger) + + // Verify SSH connectivity from login node and handle connection errors + sshLoginNodeClient, connectionErr := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, loginNodeIP) + require.NoError(t, connectionErr, "Failed to connect to the login node via SSH") + defer sshLoginNodeClient.Close() + + // Verify login node configuration + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + + // Verify PTR records + VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) + + // Verify LSF DNS on login node + VerifyLSFDNS(t, sshClient, []string{loginNodeIP}, expectedDnsDomainName, testLogger) + + // Verify file share encryption + VerifyFileShareEncryption(t, sshClient, os.Getenv("TF_VAR_ibmcloud_api_key"), utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, expectedKeyManagement, managementNodeIPList, testLogger) + + // Log validation end + testLogger.Info(t, t.Name()+" Validation ended") +} + +// ValidateBasicClusterConfigurationWithCloudAtracker verifies that the cluster setup aligns with the expected configuration +// when Observability Atracker is enabled or disabled. It performs validations across management, compute, and login nodes, +// ensuring compliance with DNS, encryption, logging, and Atracker settings. +// The function establishes SSH connections to validate node configurations, runs job verification tests, +// checks PTR records, and ensures file share encryption. If any configuration discrepancies are found, +// appropriate test errors are raised. +func ValidateBasicClusterConfigurationWithCloudAtracker(t *testing.T, options *testhelper.TestOptions, testLogger *utils.AggregatedLogger) { + // Retrieve common cluster details + expectedSolution := strings.ToLower(options.TerraformVars["solution"].(string)) + expectedClusterName, expectedReservationID, expectedMasterName := GetClusterInfo(options) + expectedResourceGroup := options.TerraformVars["existing_resource_group"].(string) + expectedKeyManagement := options.TerraformVars["key_management"].(string) + expectedZone := options.TerraformVars["zones"].([]string)[0] + expectedTargetType := options.TerraformVars["observability_atracker_target_type"].(string) + + expectedObservabilityAtrackerEnable, err := strconv.ParseBool(fmt.Sprintf("%v", options.TerraformVars["observability_atracker_enable"])) + require.NoError(t, err, "Error parsing observability_atracker_enable") + + expectedDnsDomainName, ok := options.TerraformVars["dns_domain_name"].(map[string]string)["compute"] + require.True(t, ok, "Missing or invalid 'compute' key in dns_domain_name") + + expectedHyperthreadingEnabled, err := strconv.ParseBool(options.TerraformVars["hyperthreading_enabled"].(string)) + require.NoError(t, err, "Error parsing hyperthreading_enabled") + + // Set job commands + jobCommandLow, jobCommandMed := SetJobCommands(expectedSolution, expectedZone) + + // Validate cluster creation + require.NoError(t, ValidateClusterCreation(t, options, testLogger), "Cluster creation validation failed") + + // Retrieve server IPs + bastionIP, managementNodeIPList, loginNodeIP, staticWorkerNodeIPList, err := GetClusterIPs(t, options, expectedSolution, testLogger) + require.NoError(t, err, "Failed to retrieve cluster IPs") + + testLogger.Info(t, t.Name()+" Validation started ......") + + // Establish SSH connection to master node + sshClient, err := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList[0]) + require.NoError(t, err, "Failed to connect to the master node via SSH") + defer sshClient.Close() + testLogger.Info(t, "SSH connection to master node successful") + + t.Log("Validation in progress. Please wait...") + + // Verify management node configuration + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, expectedSolution, testLogger) + + // Ensure dynamic node disappearance check runs after validation + defer func() { + if err := WaitForDynamicNodeDisappearance(t, sshClient, testLogger); err != nil { + t.Errorf("Error in WaitForDynamicNodeDisappearance: %v", err) + } + }() + + // Run job verification + VerifyJobs(t, sshClient, jobCommandMed, testLogger) + + // Get compute node IPs + computeNodeIPList, err := GetComputeNodeIPs(t, sshClient, testLogger, expectedSolution, staticWorkerNodeIPList) + require.NoError(t, err, "Failed to retrieve dynamic compute node IPs") + + // Verify compute node configuration + VerifyComputeNodeConfig(t, sshClient, expectedHyperthreadingEnabled, computeNodeIPList, testLogger) + + // Validate Atracker + ibmCloudAPIKey := os.Getenv("TF_VAR_ibmcloud_api_key") + ValidateAtracker(t, ibmCloudAPIKey, utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, expectedTargetType, expectedObservabilityAtrackerEnable, testLogger) + + // Establish SSH connection to login node + sshLoginNodeClient, err := utils.ConnectToHost(LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, loginNodeIP) + require.NoError(t, err, "Failed to connect to the login node via SSH") + defer sshLoginNodeClient.Close() + + // Verify login node configuration + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + + // Verify PTR records + VerifyPTRRecordsForManagementAndLoginNodes(t, sshClient, LSF_PUBLIC_HOST_NAME, bastionIP, LSF_PRIVATE_HOST_NAME, managementNodeIPList, loginNodeIP, expectedDnsDomainName, testLogger) + + // Verify LSF DNS on login node + VerifyLSFDNS(t, sshClient, []string{loginNodeIP}, expectedDnsDomainName, testLogger) + + // Verify file share encryption + VerifyFileShareEncryption(t, sshClient, ibmCloudAPIKey, utils.GetRegion(expectedZone), expectedResourceGroup, expectedMasterName, expectedKeyManagement, managementNodeIPList, testLogger) + + testLogger.Info(t, t.Name()+" Validation ended") +} + // ValidateClusterConfigWithAPPCenterOnExistingEnvironment validates the configuration of an existing cluster with App Center integration. // It verifies management node configuration, SSH keys, failover and failback, LSF daemon restart, dynamic compute node configuration, // login node configuration, SSH connectivity, application center configuration, noVNC configuration, PTR records, and file share encryption. @@ -1506,7 +1779,7 @@ func ValidateBasicClusterConfigurationWithCloudLogs(t *testing.T, options *testh func ValidateClusterConfigWithAPPCenterOnExistingEnvironment( t *testing.T, computeSshKeysList []string, - bastionIP, loginNodeIP, expectedClusterID, expectedReservationID, expectedMasterName, expectedResourceGroup, + bastionIP, loginNodeIP, expectedClusterName, expectedReservationID, expectedMasterName, expectedResourceGroup, expectedKeyManagement, expectedZone, expectedDnsDomainName string, managementNodeIPList []string, expectedHyperthreadingEnabled bool, @@ -1531,7 +1804,7 @@ func ValidateClusterConfigWithAPPCenterOnExistingEnvironment( t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, "hpc", testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, "hpc", testLogger) // Verify SSH key on management nodes VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "management", managementNodeIPList, expectedNumOfKeys, testLogger) @@ -1576,7 +1849,7 @@ func ValidateClusterConfigWithAPPCenterOnExistingEnvironment( defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Re-fetch dynamic compute node IPs computeNodeIPList, computeIPErr = HPCGETDynamicComputeNodeIPs(t, sshClient, testLogger) @@ -1610,7 +1883,7 @@ func ValidateClusterConfigWithAPPCenterOnExistingEnvironment( func ValidateClusterConfigWithAPPCenterAndLDAPOnExistingEnvironment( t *testing.T, computeSshKeysList []string, - bastionIP, loginNodeIP, expectedClusterID, expectedReservationID, expectedMasterName, expectedResourceGroup, + bastionIP, loginNodeIP, expectedClusterName, expectedReservationID, expectedMasterName, expectedResourceGroup, expectedKeyManagement, expectedZone, expectedDnsDomainName string, managementNodeIPList []string, expectedHyperthreadingEnabled bool, @@ -1636,7 +1909,7 @@ func ValidateClusterConfigWithAPPCenterAndLDAPOnExistingEnvironment( t.Log("Validation in progress. Please wait...") // Verify management node configuration - VerifyManagementNodeConfig(t, sshClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, "hpc", testLogger) + VerifyManagementNodeConfig(t, sshClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, managementNodeIPList, EXPECTED_LSF_VERSION, "hpc", testLogger) // Verify SSH key on management nodes VerifySSHKey(t, sshClient, bastionIP, LSF_PUBLIC_HOST_NAME, LSF_PRIVATE_HOST_NAME, "management", managementNodeIPList, expectedNumOfKeys, testLogger) @@ -1681,7 +1954,7 @@ func ValidateClusterConfigWithAPPCenterAndLDAPOnExistingEnvironment( defer sshLoginNodeClient.Close() // Verify login node configuration - VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterID, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) + VerifyLoginNodeConfig(t, sshLoginNodeClient, expectedClusterName, expectedMasterName, expectedReservationID, expectedHyperthreadingEnabled, loginNodeIP, jobCommandLow, EXPECTED_LSF_VERSION, testLogger) // Re-fetch dynamic compute node IPs computeNodeIPList, connectionErr = HPCGETDynamicComputeNodeIPs(t, sshClient, testLogger) diff --git a/tests/lsf/constants.go b/tests/lsf/constants.go index 215b1fe3..9984ec4c 100644 --- a/tests/lsf/constants.go +++ b/tests/lsf/constants.go @@ -18,8 +18,8 @@ const ( HPC_JOB_COMMAND_MED_MEM_SOUTH = `bsub -J myjob[1-1] -R "select[family=mx3d] rusage[mem=30G]" sleep 90` HPC_JOB_COMMAND_HIGH_MEM_SOUTH = `bsub -J myjob[1-1] -R "select[family=mx3d] rusage[mem=90G]" sleep 90` HPC_JOB_COMMAND_LOW_MEM_WITH_MORE_SLEEP = `bsub -J myjob[1-1] -R "select[family=mx2] rusage[mem=30G]" sleep 90` - LSF_JOB_COMMAND_LOW_MEM = `bsub -n 6 sleep 60` - LSF_JOB_COMMAND_MED_MEM = `bsub -n 8 sleep 90` + LSF_JOB_COMMAND_LOW_MEM = `bsub -n 4 sleep 60` + LSF_JOB_COMMAND_MED_MEM = `bsub -n 6 sleep 90` LSF_JOB_COMMAND_HIGH_MEM = `bsub -n 10 sleep 120` ) diff --git a/tests/other_test.go b/tests/other_test.go index 17c3a4bf..866ca039 100644 --- a/tests/other_test.go +++ b/tests/other_test.go @@ -45,6 +45,7 @@ func TestRunBasic(t *testing.T) { options.SkipTestTearDown = true defer options.TestTearDown() + // Validate that the basic cluster configuration is correct or not lsf.ValidateClusterConfiguration(t, options, testLogger) } @@ -70,6 +71,7 @@ func TestRunCustomRGAsNull(t *testing.T) { options.SkipTestTearDown = true defer options.TestTearDown() + // Validate that the basic cluster configuration is correct or not lsf.ValidateBasicClusterConfiguration(t, options, testLogger) } @@ -98,6 +100,7 @@ func TestRunCustomRGAsNonDefault(t *testing.T) { options.SkipTestTearDown = true defer options.TestTearDown() + // Validate that the basic cluster configuration is correct or not lsf.ValidateBasicClusterConfiguration(t, options, testLogger) } @@ -128,6 +131,7 @@ func TestRunAppCenter(t *testing.T) { options.SkipTestTearDown = true defer options.TestTearDown() + // Validate that the basic cluster configuration is correct or not lsf.ValidateClusterConfigurationWithAPPCenter(t, options, testLogger) } @@ -183,10 +187,10 @@ func TestRunPacHa(t *testing.T) { // Generate a unique HPC cluster prefix hpcClusterPrefix := utils.GenerateRandomString() - // Retrieve the environment variable for EXISTING_CERTIFICATE_INSTANCE - existingCertInstance, ok := os.LookupEnv("EXISTING_CERTIFICATE_INSTANCE") + // Retrieve the environment variable for app_center_existing_certificate_instance + existingCertInstance, ok := os.LookupEnv("APP_CENTER_EXISTING_CERTIFICATE_INSTANCE") if !ok { - t.Fatal("When 'app_center_high_availability' is set to true, the environment variable 'EXISTING_CERTIFICATE_INSTANCE' must be exported: export EXISTING_CERTIFICATE_INSTANCE=value") + t.Fatal("When 'app_center_existing_certificate_instance' is set to true, the environment variable 'APP_CENTER_EXISTING_CERTIFICATE_INSTANCE' must be exported: export APP_CENTER_EXISTING_CERTIFICATE_INSTANCE=value") } testLogger.Info(t, "Cluster creation process initiated for test: "+t.Name()) @@ -203,7 +207,7 @@ func TestRunPacHa(t *testing.T) { options.TerraformVars["enable_app_center"] = strings.ToLower(envVars.EnableAppCenter) options.TerraformVars["app_center_gui_pwd"] = envVars.AppCenterGuiPassword // pragma: allowlist secret options.TerraformVars["app_center_high_availability"] = true // pragma: allowlist secret - options.TerraformVars["existing_certificate_instance"] = existingCertInstance + options.TerraformVars["app_center_existing_certificate_instance"] = existingCertInstance // Skip teardown if specified options.SkipTestTearDown = true @@ -282,7 +286,7 @@ func TestRunLSFClusterCreationWithZeroWorkerNodes(t *testing.T) { defer options.TestTearDown() //Validate the basic cluster configuration. - lsf.ValidateBasicClusterConfiguration(t, options, testLogger) + lsf.ValidateBasicClusterConfigurationWithDynamicProfile(t, options, testLogger) testLogger.Info(t, "Cluster configuration validation completed successfully.") } else { testLogger.Warn(t, "Test skipped as the solution is not 'lsf'.") @@ -1223,10 +1227,6 @@ func TestRunDedicatedHost(t *testing.T) { "count": 1, "instance_type": "bx2-2x8", }, - { - "count": 1, - "instance_type": "cx2-2x4", - }, } options.SkipTestTearDown = true @@ -1335,7 +1335,6 @@ func TestRunObservabilityCloudLogsManagementEnabled(t *testing.T) { // Both management and compute logs are disabled. // Monitoring features are explicitly disabled. // The cluster setup passes basic validation checks. - func TestRunObservabilityCloudLogsManagementAndComputeDisabled(t *testing.T) { // Run the test in parallel for efficiency t.Parallel() @@ -1381,6 +1380,243 @@ func TestRunObservabilityCloudLogsManagementAndComputeDisabled(t *testing.T) { lsf.ValidateBasicClusterConfigurationWithCloudLogs(t, options, testLogger) } +// TestRunObservabilityMonitoringForManagementAndComputeEnabled validates the creation of a cluster +// with observability features enabled for both management and compute nodes. The test ensures that the +// cluster setup passes basic validation checks, confirming that the observability features for both management +// and compute are properly configured and functional, while platform logs and monitoring are disabled. +func TestRunObservabilityMonitoringForManagementAndComputeEnabled(t *testing.T) { + // Run the test in parallel with other tests to optimize test execution + t.Parallel() + + // Set up the test suite and environment configuration + setupTestSuite(t) + + // Log the initiation of the cluster creation process + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // Generate a random prefix for the cluster to ensure uniqueness + hpcClusterPrefix := utils.GenerateRandomString() + + // Retrieve necessary environment variables to configure the test + envVars := GetEnvVars() + + // Set up test options with relevant parameters, including resource group and environment variables + options, err := setupOptions(t, hpcClusterPrefix, terraformDir, envVars.DefaultExistingResourceGroup, ignoreDestroys, ignoreUpdates) + require.NoError(t, err, "Error setting up test options: %v", err) + + // Configure the observability settings for management and compute logs, + options.TerraformVars["observability_logs_enable_for_management"] = false + options.TerraformVars["observability_monitoring_enable"] = true + options.TerraformVars["observability_monitoring_on_compute_nodes_enable"] = true + options.TerraformVars["observability_monitoring_plan"] = "graduated-tier" + + // Prevent automatic test teardown for inspection after the test runs + options.SkipTestTearDown = true + + // Ensure test teardown is executed at the end of the test + defer options.TestTearDown() + + // Validate that the basic cluster configuration is correct with cloud monitoring enabled for management and compute nodes + lsf.ValidateBasicClusterConfigurationWithCloudMonitoring(t, options, testLogger) +} + +// TestRunObservabilityMonitoringForManagementEnabledAndComputeDisabled validates the creation of a cluster +// with observability features enabled for management nodes and disabled for compute nodes. The test ensures that the +// cluster setup passes basic validation checks, confirming that the observability features for management +// and compute are properly configured and functional, while platform logs and monitoring are disabled. +func TestRunObservabilityMonitoringForManagementEnabledAndComputeDisabled(t *testing.T) { + // Run the test in parallel with other tests to optimize test execution + t.Parallel() + + // Set up the test suite and environment configuration + setupTestSuite(t) + + // Log the initiation of the cluster creation process + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // Generate a random prefix for the cluster to ensure uniqueness + hpcClusterPrefix := utils.GenerateRandomString() + + // Retrieve necessary environment variables to configure the test + envVars := GetEnvVars() + + // Set up test options with relevant parameters, including resource group and environment variables + options, err := setupOptions(t, hpcClusterPrefix, terraformDir, envVars.DefaultExistingResourceGroup, ignoreDestroys, ignoreUpdates) + require.NoError(t, err, "Error setting up test options: %v", err) + + // Configure the observability settings for management and compute logs, + options.TerraformVars["observability_logs_enable_for_management"] = false + options.TerraformVars["observability_monitoring_enable"] = true + options.TerraformVars["observability_monitoring_on_compute_nodes_enable"] = false + options.TerraformVars["observability_monitoring_plan"] = "graduated-tier" + + // Prevent automatic test teardown for inspection after the test runs + options.SkipTestTearDown = true + + // Ensure test teardown is executed at the end of the test + defer options.TestTearDown() + + // Validate that the basic cluster configuration is correct with cloud monitoring enabled for management nodes and disabled for compute nodes + lsf.ValidateBasicClusterConfigurationWithCloudMonitoring(t, options, testLogger) +} + +// TestRunObservabilityMonitoringForManagementAndComputeDisabled validates the creation of a cluster +// with observability features enabled for both management and compute nodes. The test ensures that the +// cluster setup passes basic validation checks, confirming that the observability features for both management +// and compute are properly configured and functional, while platform logs and monitoring are disabled. +func TestRunObservabilityMonitoringForManagementAndComputeDisabled(t *testing.T) { + // Run the test in parallel with other tests to optimize test execution + t.Parallel() + + // Set up the test suite and environment configuration + setupTestSuite(t) + + // Log the initiation of the cluster creation process + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // Generate a random prefix for the cluster to ensure uniqueness + hpcClusterPrefix := utils.GenerateRandomString() + + // Retrieve necessary environment variables to configure the test + envVars := GetEnvVars() + + // Set up test options with relevant parameters, including resource group and environment variables + options, err := setupOptions(t, hpcClusterPrefix, terraformDir, envVars.DefaultExistingResourceGroup, ignoreDestroys, ignoreUpdates) + require.NoError(t, err, "Error setting up test options: %v", err) + + // Configure the observability settings for management and compute logs, + options.TerraformVars["observability_logs_enable_for_management"] = false + options.TerraformVars["observability_monitoring_enable"] = true + options.TerraformVars["observability_monitoring_on_compute_nodes_enable"] = false + options.TerraformVars["observability_monitoring_plan"] = "graduated-tier" + + // Prevent automatic test teardown for inspection after the test runs + options.SkipTestTearDown = true + + // Ensure test teardown is executed at the end of the test + defer options.TestTearDown() + + // Validate that the basic cluster configuration is correct with cloud monitoring disabled for management and compute nodes + lsf.ValidateBasicClusterConfigurationWithCloudMonitoring(t, options, testLogger) +} + +// TestRunobservabilityAtrackerEnabledAndTargetTypeAsCloudlogs validates cluster creation +// with Observability Atracker enabled and the target type set to Cloud Logs. +func TestRunobservabilityAtrackerEnabledAndTargetTypeAsCloudlogs(t *testing.T) { + // Execute the test in parallel to improve efficiency + t.Parallel() + + // Initialize the test suite and set up the environment + setupTestSuite(t) + + // Log the initiation of the cluster creation process + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // Generate a random prefix for the cluster to ensure uniqueness + hpcClusterPrefix := utils.GenerateRandomString() + + // Retrieve environment variables required for configuration + envVars := GetEnvVars() + + // Configure test options, including resource group and environment variables + options, err := setupOptions(t, hpcClusterPrefix, terraformDir, envVars.DefaultExistingResourceGroup, ignoreDestroys, ignoreUpdates) + require.NoError(t, err, "Error setting up test options: %v", err) + + // Set observability configurations for logs and monitoring + options.TerraformVars["observability_logs_enable_for_management"] = false + options.TerraformVars["observability_monitoring_enable"] = false + options.TerraformVars["observability_monitoring_on_compute_nodes_enable"] = false + options.TerraformVars["observability_atracker_enable"] = true + options.TerraformVars["observability_atracker_target_type"] = "cloudlogs" + + // Prevent test teardown for post-test inspection + options.SkipTestTearDown = true + + // Ensure proper cleanup after test execution + defer options.TestTearDown() + + // Validate the cluster setup with Atracker enabled and target type as cloudlogs + lsf.ValidateBasicClusterConfigurationWithCloudAtracker(t, options, testLogger) +} + +// TestRunobservabilityAtrackerEnabledAndTargetTypeAsCos validates cluster creation +// with Observability Atracker enabled and the target type set to COS. +func TestRunobservabilityAtrackerEnabledAndTargetTypeAsCos(t *testing.T) { + // Execute the test in parallel to improve efficiency + t.Parallel() + + // Initialize the test suite and set up the environment + setupTestSuite(t) + + // Log the initiation of the cluster creation process + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // Generate a random prefix for the cluster to ensure uniqueness + hpcClusterPrefix := utils.GenerateRandomString() + + // Retrieve environment variables required for configuration + envVars := GetEnvVars() + + // Configure test options, including resource group and environment variables + options, err := setupOptions(t, hpcClusterPrefix, terraformDir, envVars.DefaultExistingResourceGroup, ignoreDestroys, ignoreUpdates) + require.NoError(t, err, "Error setting up test options: %v", err) + + // Set observability configurations for logs and monitoring + options.TerraformVars["observability_logs_enable_for_management"] = false + options.TerraformVars["observability_monitoring_enable"] = false + options.TerraformVars["observability_monitoring_on_compute_nodes_enable"] = false + options.TerraformVars["observability_atracker_enable"] = true + options.TerraformVars["observability_atracker_target_type"] = "cos" + + // Prevent test teardown for post-test inspection + options.SkipTestTearDown = true + + // Ensure proper cleanup after test execution + defer options.TestTearDown() + + // Validate the cluster setup with Atracker enabled and target type as cos + lsf.ValidateBasicClusterConfigurationWithCloudAtracker(t, options, testLogger) +} + +// TestRunobservabilityAtrackerDisabledAndTargetTypeAsCos validates cluster creation +// with Observability Atracker disabled and the target type set to COS. +func TestRunobservabilityAtrackerDisabledAndTargetTypeAsCos(t *testing.T) { + // Execute the test in parallel to improve efficiency + t.Parallel() + + // Initialize the test suite and set up the environment + setupTestSuite(t) + + // Log the initiation of the cluster creation process + testLogger.Info(t, "Cluster creation process initiated for "+t.Name()) + + // Generate a random prefix for the cluster to ensure uniqueness + hpcClusterPrefix := utils.GenerateRandomString() + + // Retrieve environment variables required for configuration + envVars := GetEnvVars() + + // Configure test options, including resource group and environment variables + options, err := setupOptions(t, hpcClusterPrefix, terraformDir, envVars.DefaultExistingResourceGroup, ignoreDestroys, ignoreUpdates) + require.NoError(t, err, "Error setting up test options: %v", err) + + // Set observability configurations for logs and monitoring + options.TerraformVars["observability_logs_enable_for_management"] = false + options.TerraformVars["observability_monitoring_enable"] = false + options.TerraformVars["observability_monitoring_on_compute_nodes_enable"] = false + options.TerraformVars["observability_atracker_enable"] = false + options.TerraformVars["observability_atracker_target_type"] = "cos" + + // Prevent test teardown for post-test inspection + options.SkipTestTearDown = true + + // Ensure proper cleanup after test execution + defer options.TestTearDown() + + // Validate the cluster setup with Atracker disabled and target type as cos + lsf.ValidateBasicClusterConfigurationWithCloudAtracker(t, options, testLogger) +} + // ############################## Negative Test cases ########################################## // TestRunHPCWithoutMandatory tests Terraform's behavior when mandatory variables are missing by checking for specific error messages. @@ -1783,7 +2019,7 @@ func TestRunHPCInvalidReservationIDAndContractID(t *testing.T) { terrPath := strings.ReplaceAll(abs, "tests/", "") // Loop over all combinations of invalid cluster_name and reservation_id values - for _, clusterID := range invalidClusterNames { + for _, ClusterName := range invalidClusterNames { for _, reservationID := range invalidReservationIDs { // Define Terraform options @@ -1795,7 +2031,7 @@ func TestRunHPCInvalidReservationIDAndContractID(t *testing.T) { "compute_ssh_keys": utils.SplitAndTrim(envVars.SSHKey, ","), "zones": utils.SplitAndTrim(envVars.Zone, ","), "remote_allowed_ips": utils.SplitAndTrim(envVars.RemoteAllowedIPs, ","), - "cluster_name": clusterID, + "cluster_name": ClusterName, "reservation_id": reservationID, "solution": "hpc", }, @@ -1809,20 +2045,20 @@ func TestRunHPCInvalidReservationIDAndContractID(t *testing.T) { // If there is an error, check if it contains specific mandatory fields if err != nil { - clusterIDError := utils.VerifyDataContains(t, err.Error(), "cluster_name", testLogger) + ClusterNameError := utils.VerifyDataContains(t, err.Error(), "cluster_name", testLogger) reservationIDError := utils.VerifyDataContains(t, err.Error(), "reservation_id", testLogger) - result := clusterIDError && reservationIDError + result := ClusterNameError && reservationIDError // Assert that the result is true if all mandatory fields are missing assert.True(t, result) if result { - testLogger.PASS(t, "Validation succeeded: Invalid clusterID and ReservationID") + testLogger.PASS(t, "Validation succeeded: Invalid ClusterName and ReservationID") } else { testLogger.FAIL(t, "Validation failed: Expected error did not contain required fields: cluster_name or reservation_id") } } else { // Log an error if the expected error did not occur t.Error("Expected error did not occur") - testLogger.FAIL(t, "Expected error did not occur on Invalid clusterID and ReservationID validation") + testLogger.FAIL(t, "Expected error did not occur on Invalid ClusterName and ReservationID validation") } } } @@ -2573,10 +2809,6 @@ func TestRunInvalidDedicatedHostConfigurationWithZeroWorkerNodes(t *testing.T) { "count": 0, "instance_type": "bx2-2x8", }, - { - "count": 0, - "instance_type": "cx2-2x4", - }, } options.SkipTestTearDown = true @@ -2622,12 +2854,12 @@ func TestRunInvalidDedicatedHostProfile(t *testing.T) { "enable_dedicated_host": true, "worker_node_instance_type": []map[string]interface{}{ // Invalid data { - "count": 0, + "count": 1, "instance_type": "cx2-2x4", }, { - "count": 0, - "instance_type": "cx2-8x16", + "count": 1, + "instance_type": "bx2-2x8", }, }, "observability_monitoring_enable": false, @@ -2651,32 +2883,34 @@ func TestRunInvalidDedicatedHostProfile(t *testing.T) { Vars: vars, }) - // Cleanup resources - defer terraform.Destroy(t, terraformOptions) - // Perform Terraform upgrade only once UpgradeTerraformOnce(t, terraformOptions) // Apply the Terraform configuration - output, err := terraform.InitAndApplyE(t, terraformOptions) + _, err = terraform.InitAndPlanE(t, terraformOptions) // Check if an error occurred during apply - assert.Error(t, err, "Expected an error during apply") + assert.Error(t, err, "Expected an error during plan") if err != nil { + errMsg := err.Error() + // Check if the error message contains specific keywords + containsWorkerNodeType := utils.VerifyDataContains(t, errMsg, "is list of object with 2 elements", testLogger) + containsDedicatedHost := utils.VerifyDataContains(t, errMsg, "'enable_dedicated_host' is true, only one profile should be specified", testLogger) - // Check if the error message contains specific keywords indicating LDAP server IP issues - result := utils.VerifyDataContains(t, output, "instance profile not found in host's supported instance profile list", testLogger) + result := containsWorkerNodeType && containsDedicatedHost assert.True(t, result) + if result { - testLogger.PASS(t, "Validation succeeded for Invalid Dedicated-Host instance profile.") + testLogger.PASS(t, "Validation succeeded for invalid worker_node_instance_type object elements.") } else { - testLogger.FAIL(t, "Validation failed for Invalid Dedicated-Host instance profile.") + testLogger.FAIL(t, fmt.Sprintf("Validation failed: expected error conditions not met. Actual error: %s", errMsg)) } } else { // Log an error if the expected error did not occur - t.Error("Expected validation error did not occur.") + t.Error("Expected error did not occur") testLogger.FAIL(t, "Expected validation error did not occur for Invalid Dedicated-Host instance profile.") + } } @@ -2765,7 +2999,6 @@ func TestRunInvalidMinWorkerNodeCountGreaterThanMax(t *testing.T) { t.Error("Expected validation error did not occur.") testLogger.FAIL(t, "Expected validation error did not occur for Invalid worker node count") } - // Cleanup resources defer terraform.Destroy(t, terraformOptions) } diff --git a/tests/utilities/helpers.go b/tests/utilities/helpers.go index 1f94061b..29d0612f 100644 --- a/tests/utilities/helpers.go +++ b/tests/utilities/helpers.go @@ -220,8 +220,8 @@ func LoginIntoIBMCloudUsingCLI(t *testing.T, apiKey, region, resourceGroup strin func GenerateTimestampedClusterPrefix(prefix string) string { //Place current time in the string. t := time.Now() + //return strings.ToLower("cicd" + "-" + t.Format(TimeLayout) + "-" + prefix) return strings.ToLower("cicd" + "-" + t.Format(TimeLayout) + "-" + prefix) - } // GetPublicIP returns the public IP address using ifconfig.io API @@ -295,7 +295,7 @@ func GetValueForKey(inputMap map[string]string, key string) string { // Configuration struct matches the structure of your JSON data type Configuration struct { - ClusterName string `json:"clusterName"` + ClusterName string `json:"ClusterName"` ReservationID string `json:"reservationID"` ClusterPrefixName string `json:"clusterPrefixName"` ResourceGroup string `json:"resourceGroup"` @@ -485,79 +485,82 @@ func GetIAMToken() (string, error) { return token, nil } -// GetWorkerNodeTotalCount extracts the total "count" from the worker_node_instance_type variable. -// It uses reflection to handle slices of various underlying types. -func GetWorkerNodeTotalCount(t *testing.T, terraformVars map[string]interface{}, logger *AggregatedLogger) (int, error) { +// ConvertToInt safely converts an interface{} to an int. +func ConvertToInt(value interface{}) (int, error) { + switch v := value.(type) { + case int: + return v, nil + case float64: + return int(v), nil // JSON numbers are often float64. + case string: + intVal, err := strconv.Atoi(v) + if err != nil { + return 0, fmt.Errorf("could not convert string '%s' to int: %v", v, err) + } + return intVal, nil + default: + return 0, fmt.Errorf("unsupported type: %T", v) + } +} + +// GetTotalWorkerNodeCount extracts and sums up all "count" values. +func GetTotalWorkerNodeCount(t *testing.T, terraformVars map[string]interface{}, logger *AggregatedLogger) (int, error) { rawVal, exists := terraformVars["worker_node_instance_type"] if !exists { return 0, errors.New("worker_node_instance_type key does not exist") } - // Use reflection to check that rawVal is a slice. - val := reflect.ValueOf(rawVal) - if val.Kind() != reflect.Slice { - return 0, errors.New("worker_node_instance_type is not a slice") + // Ensure rawVal is of type []map[string]interface{} + workers, ok := rawVal.([]map[string]interface{}) + if !ok { + return 0, fmt.Errorf("worker_node_instance_type is not a slice, but %T", rawVal) } var totalCount int - for i := 0; i < val.Len(); i++ { - // Get the i-th element and assert that it's a map. - item := val.Index(i).Interface() - workerMap, ok := item.(map[string]interface{}) - if !ok { - return 0, fmt.Errorf("worker at index %d is not in the expected map format", i) - } - // Extract the "count" value. - countVal, exists := workerMap["count"] + for i, worker := range workers { + countVal, exists := worker["count"] if !exists { - return 0, fmt.Errorf("worker at index %d does not have a 'count' key", i) + return 0, fmt.Errorf("worker at index %d is missing 'count' key", i) } - // Use type assertion for an int. - count, ok := countVal.(int) - if !ok { - return 0, fmt.Errorf("count for worker at index %d is not an int", i) + + count, err := ConvertToInt(countVal) + if err != nil { + return 0, fmt.Errorf("worker at index %d has invalid 'count' value: %v", i, err) } totalCount += count - logger.Info(t, fmt.Sprintf("Expected total worker node count is %d", totalCount)) } + logger.Info(t, fmt.Sprintf("Total Worker Node Count: %d", totalCount)) return totalCount, nil } -// GetFirstWorkerNodeProfile extracts the "instance_type" from the first worker node in the slice. -// It validates the structure and returns an error if the data format is incorrect. -func GetFirstWorkerNodeProfile(t *testing.T, terraformVars map[string]interface{}, logger *AggregatedLogger) (string, error) { - +// GetFirstWorkerNodeInstanceType retrieves the "instance_type" of the first worker node. +func GetFirstWorkerNodeInstanceType(t *testing.T, terraformVars map[string]interface{}, logger *AggregatedLogger) (string, error) { rawVal, exists := terraformVars["worker_node_instance_type"] if !exists { return "", errors.New("worker_node_instance_type key does not exist") } - // Use reflection to ensure rawVal is a slice. - val := reflect.ValueOf(rawVal) - if val.Kind() != reflect.Slice || val.Len() == 0 { - return "", errors.New("worker_node_instance_type is not a valid non-empty slice") + // Ensure rawVal is of type []map[string]interface{} + workers, ok := rawVal.([]map[string]interface{}) + if !ok { + return "", fmt.Errorf("worker_node_instance_type is not a slice, but %T", rawVal) } - // Get the first element and ensure it's a map - item := val.Index(0).Interface() - workerMap, ok := item.(map[string]interface{}) - if !ok { - return "", errors.New("worker at index 0 is not in the expected map format") + if len(workers) == 0 { + return "", errors.New("worker_node_instance_type is empty") } - // Extract the "instance_type" value - instanceTypeVal, exists := workerMap["instance_type"] + instanceType, exists := workers[0]["instance_type"] if !exists { - return "", errors.New("worker at index 0 does not have an 'instance_type' key") + return "", errors.New("first worker node is missing 'instance_type' key") } - // Ensure "instance_type" is a string - instanceType, ok := instanceTypeVal.(string) + instanceTypeStr, ok := instanceType.(string) if !ok { - return "", errors.New("instance_type for worker at index 0 is not a string") + return "", errors.New("instance_type is not a string") } - logger.Info(t, fmt.Sprintf("Expected dynamic worker node instance type is %s", instanceType)) - return instanceType, nil + logger.Info(t, fmt.Sprintf("First Worker Node Instance Type: %s", instanceTypeStr)) + return instanceTypeStr, nil } diff --git a/tests/utilities/report.go b/tests/utilities/report.go index 8e55ef09..7a2cd44d 100644 --- a/tests/utilities/report.go +++ b/tests/utilities/report.go @@ -185,6 +185,7 @@ func GenerateHTMLReport(results []TestResult) error { + ` // Parse and execute the HTML template