diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index aa94aff1..87decb17 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -46,14 +46,14 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Initialize CodeQL - uses: github/codeql-action/init@45775bd8235c68ba998cffa5171334d58593da47 # v3.28.15 + uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16 with: languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@45775bd8235c68ba998cffa5171334d58593da47 # v3.28.15 + uses: github/codeql-action/autobuild@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@45775bd8235c68ba998cffa5171334d58593da47 # v3.28.15 + uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/ossf-scoreboard.yml b/.github/workflows/ossf-scoreboard.yml index 25c66aad..ddc3895d 100644 --- a/.github/workflows/ossf-scoreboard.yml +++ b/.github/workflows/ossf-scoreboard.yml @@ -50,6 +50,6 @@ jobs: retention-days: 5 - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@45775bd8235c68ba998cffa5171334d58593da47 # v3.28.15 + uses: github/codeql-action/upload-sarif@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16 with: sarif_file: results.sarif diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 8e41b4b7..f28eeb98 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -36,7 +36,7 @@ jobs: output: report-fs.sarif - name: Upload Trivy report (fs) GitHub Security - uses: github/codeql-action/upload-sarif@45775bd8235c68ba998cffa5171334d58593da47 # v3.28.15 + uses: github/codeql-action/upload-sarif@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16 with: sarif_file: report-fs.sarif category: 'fs' diff --git a/WORKSPACES/SYSTEM/DEV-WEEU-SAP01-X00/sap-parameters.yaml b/WORKSPACES/SYSTEM/DEV-WEEU-SAP01-X00/sap-parameters.yaml index 2221c9fc..d270c252 100644 --- a/WORKSPACES/SYSTEM/DEV-WEEU-SAP01-X00/sap-parameters.yaml +++ b/WORKSPACES/SYSTEM/DEV-WEEU-SAP01-X00/sap-parameters.yaml @@ -27,3 +27,16 @@ database_cluster_type: AFA # Storage Profile # ############################################################################# NFS_provider: AFS + +############################################################################# +# Key Vault Parameters (optional) # +############################################################################# +key_vault_id: /subscriptions//resourceGroups//providers/Microsoft.KeyVault/vaults/ +secret_id: https://.vault.azure.net/secrets// + +############################################################################# +# MSI Client ID # +############################################################################# +# The MSI Client ID is used to authenticate to Azure services +# and is required if the management server uses user assigned managed identity +user_assigned_identity_client_id: "00000000-0000-0000-0000-000000000000" diff --git a/docs/HIGH_AVAILABILITY.md b/docs/HIGH_AVAILABILITY.md index 41297a8b..4e61665f 100644 --- a/docs/HIGH_AVAILABILITY.md +++ b/docs/HIGH_AVAILABILITY.md @@ -255,6 +255,13 @@ platform: "HANA" # - ANF (for Azure NetApp Files) # - AFS (for Azure File Share) NFS_provider: "ANF" # or "AFS" + +# If you're using a user-assigned managed identity (as explained in "Azure RBAC" section above): +# - Enter the client ID of that identity here +# - You can find this ID in Azure Portal → Managed Identities → Your Identity → Properties → Client ID +# If you're using system-assigned managed identity instead: +# - Leave this blank or set to empty string "" +user_assigned_identity_client_id: "000000-00000-00000-00000-000000" ``` 2.2.3. Credential Files diff --git a/docs/SCS_HIGH_AVAILABILITY.md b/docs/SCS_HIGH_AVAILABILITY.md index d2711de8..471ebb45 100644 --- a/docs/SCS_HIGH_AVAILABILITY.md +++ b/docs/SCS_HIGH_AVAILABILITY.md @@ -5,5 +5,14 @@ | Test Case | Type | Description | More Info | |------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------| | HA Parameters Validation | Configuration | The HA parameter validation test validates HA configuration including Corosync settings, Pacemaker resources, SBD device configuration, and SCS system replication setup. | [ha-config.yml](../src/roles/ha_scs/tasks/ha-config.yml) | -| Resource Migration | Failover | The Resource Migration test validates planned failover scenarios by controlling resource movement between SCS nodes, ensuring proper role changes and data synchronization. | [ascs-migration.yml](../src/roles/ha_scs/tasks/ascs-migration.yml) | -| ASCS Node Crash | Network | The ASCS Node Crash test simulates cluster behavior when the ASCS node crashes. It simulates an ASCS node failure by forcefully terminating the process, then verifies automatic failover to the ERS node, monitors system replication status, and confirms service recovery without data loss. | [ascs-node-crash.yml](../src/roles/ha_scs/tasks/ascs-node-crash.yml) | +| Azure Load Balancer | Configuration | The Azure LB configuration test validates Azure Load Balancer setup including health probe configuration, backend pool settings, load balancing rules, and frontend IP configuration. | [azure-lb.yml](../src/roles/ha_scs/tasks/azure-lb.yml) | +| SAPControl Config Validation | Configuration | The SAPControl Config Validation test runs multiple sapcontrol commands to validate the SCS configuration. It executes commands like HAGetFailoverConfig, HACheckFailoverConfig, and HACheckConfig, capturing their outputs and statuses to ensure proper configuration and functionality. | [sapcontrol-config.yml](../src/roles/ha_scs/tasks/sapcontrol-config.yml) | +| Resource Migration | Failover | The Resource Migration test validates planned failover scenarios by controlling resource movement between SCS nodes, ensuring proper role changes. | [ascs-migration.yml](../src/roles/ha_scs/tasks/ascs-migration.yml) | +| ASCS Node Crash | Failover | The ASCS Node Crash test simulates cluster behavior when the ASCS node crashes. It simulates an ASCS node failure by forcefully terminating the process, then verifies automatic failover to the ERS node, monitors system replication status, and confirms service recovery. | [ascs-node-crash.yml](../src/roles/ha_scs/tasks/ascs-node-crash.yml) | +| Block Network Communication | Network | The Block Network test validates cluster behavior during network partition scenarios by implementing iptables rules to block communication between ASCS and ERS nodes. It verifies split-brain prevention mechanisms, validates proper failover execution when nodes become isolated, and ensures cluster stability after network connectivity is restored. | [block-network.yml](../src/roles/ha_scs/tasks/block-network.yml) | +| Kill Message Server Process | Process | The Message Server Process Kill test simulates failure of the message server process on the ASCS node by forcefully terminating it using the kill -9 signal. It verifies proper cluster reaction, automatic failover to the ERS node, and ensures service continuity after the process failure. | [kill-message-server.yml](../src/roles/ha_scs/tasks/kill-message-server.yml) | +| Kill Enqueue Server Process | Process | The Enqueue Server Process Kill test simulates failure of the enqueue server process on the ASCS node by forcefully terminating it using the kill -9 signal. It validates proper cluster behavior, automatic failover execution. | [kill-enqueue-server.yml](../src/roles/ha_scs/tasks/kill-enqueue-server.yml) | +| Kill Enqueue Replication Server Process | Process | The Enqueue Replication Server Process Kill test simulates failure of the replication server process on the ERS node by forcefully terminating it using the kill -9 signal. This test handles both ENSA1 and ENSA2 architectures. It validates the automatic restart of the process. | [kill-enqueue-replication.yml](../src/roles/ha_scs/tasks/kill-enqueue-replication.yml) | +| Kill sapstartsrv Process for ASCS | Process | The sapstartsrv Process Kill test simulates failure of the SAP Start Service for the ASCS instance by forcefully terminating it using the kill -9 signal. It validates proper cluster reaction, automatic failover to the ERS node, and verifies service restoration after the process failure. | [kill-sapstartsrv.yml](../src/roles/ha_scs/tasks/kill-sapstartsrv.yml) | +| Manual Restart of ASCS Instance | Control | The Manual Restart test validates cluster behavior when the ASCS instance is manually stopped using sapcontrol. It verifies proper cluster reaction to a controlled instance shutdown, ensures automatic failover to the ERS node, and confirms service continuity throughout the operation. | [manual-restart.yml](../src/roles/ha_scs/tasks/manual-restart.yml) | +| HAFailoverToNode Test | Control | The HAFailoverToNode test validates SAP's built-in high availability functionality by using the sapcontrol command to trigger a controlled failover. It executes 'HAFailoverToNode' as the SAP administrator user, which initiates a clean migration of the ASCS instance to another node. | [ha-failover-to-node.yml](../src/roles/ha_scs/tasks/ha-failover-to-node.yml) | diff --git a/docs/SDAF_INTEGRATION.md b/docs/SDAF_INTEGRATION.md index aa122f53..4f9a2de2 100644 --- a/docs/SDAF_INTEGRATION.md +++ b/docs/SDAF_INTEGRATION.md @@ -60,13 +60,41 @@ This guide will help you set up your existing SAP Deployment Automation Framewor | **SAP Functional Tests Type** | Test category to run | Yes | DatabaseHighAvailability | | **Telemetry Data Destination** | Where to send test data | No | AzureLogAnalytics | - **For AzureLogAnalytics destination** (required parameters): + **Providing Telemetry Data Destination Parameters** + + To configure the Telemetry Data Destination for the SAP Testing Automation Framework, you need to specify the required parameters in the Extra Parameters input field. This allows the pipeline to send telemetry data to the desired destination, such as Azure Log Analytics or Azure Data Explorer. + + **How to Specify Telemetry Parameters** + + Use the following format in the Extra Parameters field: + + ```bash + --extra-vars "laws_workspace_id=,laws_shared_key=,telemetry_table_name=" + ``` + + Telemetry Data Destination Options + 1. **Azure Log Analytics** + If you are using Azure Log Analytics as the telemetry destination, the following parameters are required: - `laws_workspace_id`: Log Analytics Workspace ID - `laws_shared_key`: Log Analytics Shared Key - `telemetry_table_name`: Name of the table in Log Analytics - **For AzureDataExplorer destination** (required parameters): + ```bash + --extra-vars "laws_workspace_id=12345678-1234-1234-1234-123456789abc,laws_shared_key=**********,telemetry_table_name=SAPTelemetry" + ``` + + 2. **Azure Data Explorer** + If you are using Azure Data Explorer (ADX) as the telemetry destination, the following parameters are required: - `adx_cluster_fqdn`: Azure Data Explorer Cluster FQDN - `adx_database_name`: Azure Data Explorer Database Name - `adx_client_id`: Azure Data Explorer Client ID - `telemetry_table_name`: Name of the table in ADX database + + ```bash + --extra-vars "adx_cluster_fqdn=myadxcluster.kusto.windows.net,adx_database_name=SAPTelemetryDB,adx_client_id=12345678-1234-1234-1234-123456789abc,telemetry_table_name=SAPTelemetry" + ``` + + + + + diff --git a/docs/pseudocode/block-network.md b/docs/pseudocode/block-network.md index bc5e6f97..79496aaf 100644 --- a/docs/pseudocode/block-network.md +++ b/docs/pseudocode/block-network.md @@ -81,4 +81,34 @@ FUNCTION BlockNetworkTest(): RETURN "TEST_PASSED" END FUNCTION +``` + +## ASCS Block Network Test Case + +This test case is a specific instance of blocking network communication, focusing on ASCS-specific scenarios. + +### Pre-requisites + +- Functioning ASCS/ERS cluster +- Two active nodes (ASCS and ERS) +- Cluster services running +- iptables service accessible +- STONITH configuration (stonith-enabled=true) + +### Additional Steps for ASCS Block Network + +- Validate ASCS-specific failover behavior. +- Ensure proper role changes for ASCS and ERS nodes. + +### Pseudocode Extension + +```pseudocode +FUNCTION ASCSBlockNetworkTest(): + // Reuse BlockNetworkTest pseudocode + CALL BlockNetworkTest() + + // Additional ASCS-specific validations + validate_ascs_failover_behavior() + ensure_ascs_role_changes() +END FUNCTION ``` \ No newline at end of file diff --git a/docs/pseudocode/ha-failover-to-node.md b/docs/pseudocode/ha-failover-to-node.md new file mode 100644 index 00000000..4860ec4f --- /dev/null +++ b/docs/pseudocode/ha-failover-to-node.md @@ -0,0 +1,69 @@ + +# HAFailoverToNode Test Case + +## Prerequisites + +- Functioning SCS cluster +- Two active nodes (ASCS and ERS) +- Cluster services running +- Proper resource configuration + +## Validation + +- Verify failover to the ERS node +- Check cluster stability +- Validate proper role changes + +## Pseudocode + +```pseudocode +FUNCTION HAFailoverToNodeTest(): + // Setup Phase + EXECUTE TestSetup() + EXECUTE PreValidations() + + IF pre_validations_status != "PASSED" THEN + RETURN "Test Prerequisites Failed" + + // Main Test Execution + TRY: + IF current_node == ascs_node THEN + record_start_time() + + // Execute Failover Command + success = execute_failover_command(ers_node) + IF NOT success THEN + THROW "Failed to execute failover command" + + // Validate Cluster Status + cluster_status = validate_cluster_status() + IF cluster_status.ascs_node != ers_node OR cluster_status.ers_node != ascs_node THEN + THROW "Cluster status validation failed after failover" + + // Cleanup Constraints + success = remove_location_constraints() + IF NOT success THEN + THROW "Failed to remove location constraints" + + // Cleanup Resources + success = cleanup_cluster_resources() + IF NOT success THEN + THROW "Failed to cleanup cluster resources" + + record_end_time() + generate_test_report() + END IF + + EXECUTE PostValidations() + + CATCH any_error: + LOG "Error occurred: " + any_error + EXECUTE RescueOperations() + EXECUTE CleanupOperations() + RETURN "TEST_FAILED" + FINALLY: + EXECUTE EnsureClusterHealth() + + RETURN "TEST_PASSED" +END FUNCTION +``` diff --git a/docs/pseudocode/kill-message-server.md b/docs/pseudocode/kill-message-server.md new file mode 100644 index 00000000..31e530cc --- /dev/null +++ b/docs/pseudocode/kill-message-server.md @@ -0,0 +1,112 @@ + +# Kill Message Server Process Test Case + +## Prerequisites + +- Functioning SCS cluster +- Two active nodes (ASCS and ERS) +- Cluster services running +- Proper resource configuration + +## Validation + +- Verify failover to the ERS node +- Check cluster stability +- Validate proper role changes + +## Pseudocode + +```pseudocode +FUNCTION KillMessageServerTest(): + // Setup Phase + EXECUTE TestSetup() + EXECUTE PreValidations() + + IF pre_validations_status != "PASSED" THEN + RETURN "Test Prerequisites Failed" + + // Main Test Execution + TRY: + IF current_node == ascs_node THEN + record_start_time() + + // Check ENSA Version + ensa_version = check_ensa_version() + + // Kill Message Server Process + success = kill_message_server_process() + IF NOT success THEN + THROW "Failed to kill message server process" + + // Validate ASCS Node Stopped + cluster_status = validate_cluster_status() + IF cluster_status.ascs_node != "" THEN + THROW "ASCS node did not stop as expected" + + // Validate Failover to ERS Node + cluster_status = validate_cluster_status() + IF cluster_status.ascs_node != ers_node OR cluster_status.ers_node != ascs_node THEN + THROW "Failover validation failed" + + // Cleanup Resources + success = cleanup_cluster_resources() + IF NOT success THEN + THROW "Failed to cleanup cluster resources" + + record_end_time() + generate_test_report() + END IF + + EXECUTE PostValidations() + + CATCH any_error: + LOG "Error occurred: " + any_error + EXECUTE RescueOperations() + EXECUTE CleanupOperations() + RETURN "TEST_FAILED" + FINALLY: + EXECUTE EnsureClusterHealth() + + RETURN "TEST_PASSED" +END FUNCTION +``` + +## Kill Enqueue, Enqueue Replication, and sapstartsrv Processes + +These test cases are specific instances of killing processes, focusing on enqueue, enqueue replication, and sapstartsrv processes. + +### Additional Steps for Each Process + +- Validate process-specific failover behavior. +- Ensure proper role changes for ASCS and ERS nodes. + +### Pseudocode Extension + +```pseudocode +FUNCTION KillEnqueueProcessTest(): + // Reuse KillMessageServerTest pseudocode + CALL KillMessageServerTest() + + // Additional enqueue-specific validations + validate_enqueue_failover_behavior() + ensure_enqueue_role_changes() +END FUNCTION + +FUNCTION KillEnqueueReplicationProcessTest(): + // Reuse KillMessageServerTest pseudocode + CALL KillMessageServerTest() + + // Additional enqueue replication-specific validations + validate_enqueue_replication_failover_behavior() + ensure_enqueue_replication_role_changes() +END FUNCTION + +FUNCTION KillSapstartsrvProcessTest(): + // Reuse KillMessageServerTest pseudocode + CALL KillMessageServerTest() + + // Additional sapstartsrv-specific validations + validate_sapstartsrv_failover_behavior() + ensure_sapstartsrv_role_changes() +END FUNCTION +``` diff --git a/docs/pseudocode/manual-restart.md b/docs/pseudocode/manual-restart.md new file mode 100644 index 00000000..faa220be --- /dev/null +++ b/docs/pseudocode/manual-restart.md @@ -0,0 +1,59 @@ + +# Manual Restart of ASCS Instance Test Case + +## Prerequisites + +- Functioning ASCS/ERS cluster +- Two active nodes (ASCS and ERS) +- Cluster services running +- Proper resource configuration + +## Validation + +- Verify ASCS instance restarts successfully +- Check cluster stability +- Validate proper role changes + +## Pseudocode + +```pseudocode +FUNCTION ManualRestartASCSInstanceTest(): + // Setup Phase + EXECUTE TestSetup() + EXECUTE PreValidations() + + IF pre_validations_status != "PASSED" THEN + RETURN "Test Prerequisites Failed" + + // Main Test Execution + TRY: + IF current_node == ascs_node THEN + record_start_time() + + // Restart ASCS Instance + success = restart_ascs_instance() + IF NOT success THEN + THROW "Failed to restart ASCS instance" + + // Validate ASCS Instance Status + cluster_status = validate_cluster_status() + IF cluster_status.ascs_node != ascs_node THEN + THROW "ASCS instance did not restart as expected" + + record_end_time() + generate_test_report() + END IF + + EXECUTE PostValidations() + + CATCH any_error: + LOG "Error occurred: " + any_error + EXECUTE RescueOperations() + EXECUTE CleanupOperations() + RETURN "TEST_FAILED" + FINALLY: + EXECUTE EnsureClusterHealth() + + RETURN "TEST_PASSED" +END FUNCTION +``` diff --git a/docs/pseudocode/node-crash.md b/docs/pseudocode/node-crash.md index 31dcb6bf..bfd7d56c 100644 --- a/docs/pseudocode/node-crash.md +++ b/docs/pseudocode/node-crash.md @@ -69,4 +69,33 @@ FUNCTION PrimaryNodeCrashTest(): RETURN "Test Passed" END FUNCTION +``` + +## ASCS Node Crash Test Case + +This test case is a specific instance of node crash, focusing on simulating an ASCS node crash and validating failover behavior. + +### Pre-requisites + +- Functioning ASCS/ERS cluster +- Two active nodes (ASCS and ERS) +- Cluster services running +- STONITH configuration (stonith-enabled=true) + +### Additional Steps for ASCS Node Crash + +- Validate ASCS-specific failover behavior. +- Ensure proper role changes for ASCS and ERS nodes. + +### Pseudocode Extension + +```pseudocode +FUNCTION ASCSNodeCrashTest(): + // Reuse PrimaryNodeCrashTest pseudocode + CALL PrimaryNodeCrashTest() + + // Additional ASCS-specific validations + validate_ascs_failover_behavior() + ensure_ascs_role_changes() +END FUNCTION ``` \ No newline at end of file diff --git a/docs/pseudocode/resource-migration.md b/docs/pseudocode/resource-migration.md index b03fc672..d8fffac5 100644 --- a/docs/pseudocode/resource-migration.md +++ b/docs/pseudocode/resource-migration.md @@ -74,4 +74,33 @@ FUNCTION ResourceMigrationTest(): RETURN "Test Passed" END FUNCTION +``` + +## ASCS Migration Test Case + +This test case is a specific instance of resource migration, focusing on migrating the ASCS resource to the ERS node. + +### Pre-requisites + +- Functioning ASCS/ERS cluster +- Two active nodes (ASCS and ERS) +- Cluster services running +- STONITH configuration (stonith-enabled=true) + +### Additional Steps for ASCS Migration + +- Validate ASCS-specific constraints and cleanup. +- Ensure proper role changes for ASCS and ERS nodes. + +### Pseudocode Extension + +```pseudocode +FUNCTION ManualASCSMigrationTest(): + // Reuse ResourceMigrationTest pseudocode + CALL ResourceMigrationTest() + + // Additional ASCS-specific validations + validate_ascs_constraints() + ensure_ascs_role_changes() +END FUNCTION ``` \ No newline at end of file diff --git a/docs/pseudocode/sapcontrol-config.md b/docs/pseudocode/sapcontrol-config.md new file mode 100644 index 00000000..8a63d787 --- /dev/null +++ b/docs/pseudocode/sapcontrol-config.md @@ -0,0 +1,52 @@ + +# SAPControl Configuration Validation Test Case + +## Prerequisites + +- Functioning ASCS/ERS cluster +- Two active nodes (ASCS and ERS) +- Proper SAPControl configuration +- Cluster services running + +## Validation + +- Verify SAPControl configuration matches expected values +- Check cluster stability +- Validate proper role changes + +## Pseudocode + +```pseudocode +FUNCTION SAPControlConfigValidationTest(): + // Setup Phase + EXECUTE TestSetup() + EXECUTE PreValidations() + + IF pre_validations_status != "PASSED" THEN + RETURN "Test Prerequisites Failed" + + // Main Test Execution + TRY: + // Validate SAPControl Configuration + config_status = validate_sapcontrol_configuration() + IF NOT config_status THEN + THROW "SAPControl configuration validation failed" + + // Validate Cluster Stability + cluster_status = validate_cluster_status() + IF NOT cluster_status.is_stable THEN + THROW "Cluster stability validation failed" + + generate_test_report() + + CATCH any_error: + LOG "Error occurred: " + any_error + EXECUTE RescueOperations() + EXECUTE CleanupOperations() + RETURN "TEST_FAILED" + FINALLY: + EXECUTE EnsureClusterHealth() + + RETURN "TEST_PASSED" +END FUNCTION +``` diff --git a/requirements.in b/requirements.in index 7fff8760..f065a270 100644 --- a/requirements.in +++ b/requirements.in @@ -26,6 +26,7 @@ numpy pandas # Core utilities +jmespath Jinja2 PyYAML requests diff --git a/requirements.txt b/requirements.txt index 8e65cdb3..2e8902b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,12 +6,12 @@ # ansible-compat==25.1.5 # via ansible-lint -ansible-core==2.17.10 +ansible-core==2.17.11 # via # -r requirements.in # ansible-compat # ansible-lint -ansible-lint==25.2.1 +ansible-lint==25.4.0 # via -r requirements.in ansible-runner==2.4.1 # via -r requirements.in @@ -23,7 +23,7 @@ attrs==25.3.0 # referencing azure-common==1.1.28 # via azure-mgmt-network -azure-core==1.33.0 +azure-core==1.34.0 # via # azure-identity # azure-kusto-data @@ -34,17 +34,17 @@ azure-identity==1.21.0 # via # -r requirements.in # azure-kusto-data -azure-kusto-data==5.0.2 +azure-kusto-data==5.0.3 # via # -r requirements.in # azure-kusto-ingest -azure-kusto-ingest==5.0.2 +azure-kusto-ingest==5.0.3 # via -r requirements.in azure-mgmt-core==1.5.0 # via azure-mgmt-network azure-mgmt-network==28.1.0 # via -r requirements.in -azure-storage-blob==12.25.1 +azure-storage-blob==12.23.0 # via # -r requirements.in # azure-kusto-ingest @@ -58,11 +58,11 @@ black==25.1.0 # ansible-lint bracex==2.5.post1 # via wcmatch -certifi==2025.1.31 +certifi==2025.4.26 # via requests cffi==1.17.1 # via cryptography -charset-normalizer==3.4.1 +charset-normalizer==3.4.2 # via requests click==8.1.8 # via @@ -72,7 +72,7 @@ coverage[toml]==7.8.0 # via # -r requirements.in # pytest-cov -cryptography==44.0.2 +cryptography==44.0.3 # via # ansible-core # azure-identity @@ -90,7 +90,7 @@ idna==3.10 # via requests ijson==3.3.0 # via azure-kusto-data -importlib-metadata==8.6.1 +importlib-metadata==8.7.0 # via ansible-lint iniconfig==2.1.0 # via pytest @@ -105,11 +105,13 @@ jinja2==3.1.6 # via # -r requirements.in # ansible-core +jmespath==1.0.1 + # via -r requirements.in jsonschema==4.23.0 # via # ansible-compat # ansible-lint -jsonschema-specifications==2024.10.1 +jsonschema-specifications==2025.4.1 # via jsonschema lockfile==0.12.2 # via python-daemon @@ -121,14 +123,14 @@ mccabe==0.7.0 # via pylint mdurl==0.1.2 # via markdown-it-py -msal==1.32.0 +msal==1.32.3 # via # azure-identity # azure-kusto-data # msal-extensions msal-extensions==1.3.1 # via azure-identity -mypy-extensions==1.0.0 +mypy-extensions==1.1.0 # via black numpy==2.2.5 # via @@ -167,7 +169,7 @@ pyjwt[crypto]==2.10.1 # via # msal # pyjwt -pylint==3.3.6 +pylint==3.3.7 # via -r requirements.in pytest==8.3.5 # via @@ -252,7 +254,7 @@ urllib3==2.4.0 # via requests wcmatch==10.0 # via ansible-lint -yamllint==1.37.0 +yamllint==1.37.1 # via ansible-lint zipp==3.21.0 # via importlib-metadata diff --git a/scripts/sap_automation_qa.sh b/scripts/sap_automation_qa.sh index 2d83de28..2119e08e 100755 --- a/scripts/sap_automation_qa.sh +++ b/scripts/sap_automation_qa.sh @@ -18,7 +18,12 @@ RED='\033[0;31m' GREEN='\033[0;32m' NC='\033[0m' -# Function to print logs with color based on severity +# Global variable to store the path of the temporary file. +temp_file="" + +# Print logs with color based on severity. +# :param severity: The severity level of the log (e.g., "INFO", "ERROR"). +# :param message: The message to log. log() { local severity=$1 local message=$2 @@ -37,12 +42,15 @@ log "INFO" "ANSIBLE_MODULE_UTILS: $ANSIBLE_MODULE_UTILS" # Define the path to the vars.yaml file VARS_FILE="${cmd_dir}/../vars.yaml" -# Function to check if a command exists +# Check if a command exists. +# :param command: The command to check. +# :return: None. Exits with a non-zero status if the command does not exist. command_exists() { command -v "$1" &> /dev/null } -# Function to validate input parameters from vars.yaml +# Validate input parameters from vars.yaml. +# :return: None. Exits with a non-zero status if validation fails. validate_params() { local missing_params=() local params=("TEST_TYPE" "SYSTEM_CONFIG_NAME" "sap_functional_test_type" "AUTHENTICATION_TYPE") @@ -71,18 +79,37 @@ validate_params() { fi } -# Function to check if a file exists +# Check if a file exists. +# :param file_path: The path to the file to check. +# :param error_message: The error message to display if the file does not exist. +# :return: None. Exits with a non-zero status if the file does not exist. check_file_exists() { local file_path=$1 local error_message=$2 - + log "INFO" "Checking if file exists: $file_path" if [[ ! -f "$file_path" ]]; then log "ERROR" "Error: $error_message" exit 1 fi } -# Function to determine the playbook name based on the sap_functional_test_type +# Extract the error message from a command's output. +# :param error_output: The output containing the error message. +# :return: The extracted error message or a default message if none is found. +extract_error_message() { + local error_output=$1 + local extracted_message + + extracted_message=$(echo "$error_output" | grep -oP '(?<=Message: ).*' | head -n 1) + if [[ -z "$extracted_message" ]]; then + extracted_message="An unknown error occurred. See full error details above." + fi + echo "$extracted_message" +} + +# Determine the playbook name based on the sap_functional_test_type. +# :param test_type: The type of SAP functional test. +# :return: The name of the playbook. get_playbook_name() { local test_type=$1 @@ -100,7 +127,82 @@ get_playbook_name() { esac } -# Function to run the ansible playbook +# Retrieve a secret from Azure Key Vault. +# :param key_vault_id: The ID of the Key Vault. +# :param secret_id: The ID of the secret in the Key Vault. +# :param auth_type: The authentication type (e.g., "SSHKEY", "VMPASSWORD"). +# :return: None. Exits with a non-zero status if retrieval fails. +retrieve_secret_from_key_vault() { + local key_vault_id=$1 + local secret_id=$2 + local auth_type=$3 # Add auth_type as a parameter + + subscription_id=$(echo "$key_vault_id" | awk -F'/' '{for(i=1;i<=NF;i++){if($i=="subscriptions"){print $(i+1)}}}') + + if [[ -z "$key_vault_id" || -z "$secret_id" ]]; then + log "ERROR" "Key Vault ID or secret ID is missing." + exit 1 + fi + + log "INFO" "Using Key Vault ID: $key_vault_id" + log "INFO" "Using secret ID: $secret_id" + + # Authenticate using MSI + log "INFO" "Authenticating using MSI..." + az login --identity + az account set --subscription "$subscription_id" + if [[ $? -ne 0 ]]; then + log "ERROR" "Failed to authenticate using MSI." + exit 1 + fi + + # Attempt to retrieve the secret value and handle errors + log "INFO" "Retrieving secret from Key Vault using resource ID..." + set +e # Temporarily disable exit on error + secret_value=$(az keyvault secret show --id "$secret_id" --query "value" -o tsv 2>&1) + az_exit_code=$? # Capture the exit code of the az command + set -e # Re-enable exit on error + + if [[ $az_exit_code -ne 0 || -z "$secret_value" ]]; then + extracted_message=$(extract_error_message "$secret_value") + log "ERROR" "Failed to retrieve secret from Key Vault: $extracted_message" + exit 1 + fi + + log "INFO" "Successfully retrieved secret from Key Vault." + + # Define a unique temporary file path based on auth_type + if [[ "$auth_type" == "SSHKEY" ]]; then + temp_file=$(mktemp --dry-run --suffix=.ppk) + elif [[ "$auth_type" == "VMPASSWORD" ]]; then + temp_file=$(mktemp --dry-run) + else + log "ERROR" "Unknown authentication type: $auth_type" + exit 1 + fi + + if [[ -f "$temp_file" ]]; then + log "ERROR" "Temporary file already exists: $temp_file" + exit 1 + fi + + # Create the temporary file and write the secret value to it + echo "$secret_value" > "$temp_file" + chmod 600 "$temp_file" # Set the correct permissions for the file + if [[ ! -s "$temp_file" ]]; then + log "ERROR" "Failed to store the retrieved secret in the temporary file." + exit 1 + fi + log "INFO" "Temporary file created with secure permissions: $temp_file" +} + +# Run the ansible playbook. +# :param playbook_name: The name of the playbook to run. +# :param system_hosts: The path to the inventory file. +# :param system_params: The path to the SAP parameters file. +# :param auth_type: The authentication type (e.g., "SSHKEY", "VMPASSWORD"). +# :param system_config_folder: The path to the system configuration folder. +# :return: None. Exits with the return code of the ansible-playbook command. run_ansible_playbook() { local playbook_name=$1 local system_hosts=$2 @@ -108,16 +210,61 @@ run_ansible_playbook() { local auth_type=$4 local system_config_folder=$5 + # Set local secret_id and key_vault_id if defined + local secret_id=$(grep "^secret_id:" "$system_params" | awk '{split($0,a,": "); print a[2]}' | xargs || true) + local key_vault_id=$(grep "^key_vault_id:" "$system_params" | awk '{split($0,a,": "); print a[2]}' | xargs || true) + + if [[ -n "$secret_id" ]]; then + log "INFO" "Extracted secret_id: $secret_id" + fi + + if [[ -n "$key_vault_id" ]]; then + log "INFO" "Extracted key_vault_id: $key_vault_id" + fi + if [[ "$auth_type" == "SSHKEY" ]]; then - local ssh_key="${cmd_dir}/../WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME/ssh_key.ppk" - log "INFO" "Using SSH key: $ssh_key." - command="ansible-playbook ${cmd_dir}/../src/$playbook_name.yml -i $system_hosts --private-key $ssh_key \ - -e @$VARS_FILE -e @$system_params -e '_workspace_directory=$system_config_folder'" + log "INFO" "Authentication type is SSHKEY." + + if [[ -n "$key_vault_id" && -n "$secret_id" ]]; then + log "INFO" "Key Vault ID and Secret ID are set. Retrieving SSH key from Key Vault." + retrieve_secret_from_key_vault "$key_vault_id" "$secret_id" "SSHKEY" + + check_file_exists "$temp_file" \ + "Temporary SSH key file not found. Please check the Key Vault secret ID." + command="ansible-playbook ${cmd_dir}/../src/$playbook_name.yml -i $system_hosts --private-key $temp_file \ + -e @$VARS_FILE -e @$system_params -e '_workspace_directory=$system_config_folder'" + else + check_file_exists "${cmd_dir}/../WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME/ssh_key.ppk" \ + "ssh_key.ppk not found in WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME directory." + ssh_key="${cmd_dir}/../WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME/ssh_key.ppk" + command="ansible-playbook ${cmd_dir}/../src/$playbook_name.yml -i $system_hosts --private-key $ssh_key \ + -e @$VARS_FILE -e @$system_params -e '_workspace_directory=$system_config_folder'" + fi + + elif [[ "$auth_type" == "VMPASSWORD" ]]; then + log "INFO" "Authentication type is VMPASSWORD." + + if [[ -n "$key_vault_id" && -n "$secret_id" ]]; then + log "INFO" "Key Vault ID and Secret ID are set. Retrieving VM password from Key Vault." + retrieve_secret_from_key_vault "$key_vault_id" "$secret_id" "VMPASSWORD" + + check_file_exists "$temp_file" \ + "Temporary SSH key file not found. Please check the Key Vault secret ID." + command="ansible-playbook ${cmd_dir}/../src/$playbook_name.yml -i $system_hosts \ + --extra-vars \"ansible_ssh_pass=$(cat $temp_file)\" --extra-vars @$VARS_FILE -e @$system_params \ + -e '_workspace_directory=$system_config_folder'" + else + local password_file="${cmd_dir}/../WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME/password" + check_file_exists "$password_file" \ + "password file not found in WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME directory." + command="ansible-playbook ${cmd_dir}/../src/$playbook_name.yml -i $system_hosts \ + --extra-vars \"ansible_ssh_pass=$(cat $password_file)\" --extra-vars @$VARS_FILE -e @$system_params \ + -e '_workspace_directory=$system_config_folder'" + fi + else - log "INFO" "Using password authentication." - command="ansible-playbook ${cmd_dir}/../src/$playbook_name.yml -i $system_hosts \ - --extra-vars \"ansible_ssh_pass=$(cat ${cmd_dir}/../WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME/password)\" \ - --extra-vars @$VARS_FILE -e @$system_params -e '_workspace_directory=$system_config_folder'" + log "ERROR" "Unknown authentication type: $auth_type" + exit 1 fi log "INFO" "Running ansible playbook..." @@ -126,10 +273,17 @@ run_ansible_playbook() { return_code=$? log "INFO" "Ansible playbook execution completed with return code: $return_code" + # Clean up temporary file if it exists + if [[ -n "$temp_file" && -f "$temp_file" ]]; then + rm -f "$temp_file" + log "INFO" "Temporary file deleted: $temp_file" + fi + exit $return_code } -# Main script execution +# Main script execution. +# :return: None. Exits with a non-zero status if any step fails. main() { log "INFO" "Activate the virtual environment..." set -e @@ -152,19 +306,11 @@ main() { check_file_exists "$SYSTEM_PARAMS" \ "sap-parameters.yaml not found in WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME directory." - log "INFO" "Checking if the SSH key or password file exists..." - if [[ "$AUTHENTICATION_TYPE" == "SSHKEY" ]]; then - check_file_exists "${cmd_dir}/../WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME/ssh_key.ppk" \ - "ssh_key.ppk not found in WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME directory." - else - check_file_exists "${cmd_dir}/../WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME/password" \ - "password file not found in WORKSPACES/SYSTEM/$SYSTEM_CONFIG_NAME directory." - fi - playbook_name=$(get_playbook_name "$sap_functional_test_type") log "INFO" "Using playbook: $playbook_name." run_ansible_playbook "$playbook_name" "$SYSTEM_HOSTS" "$SYSTEM_PARAMS" "$AUTHENTICATION_TYPE" "$SYSTEM_CONFIG_FOLDER" + } # Execute the main function diff --git a/src/module_utils/get_cluster_status.py b/src/module_utils/get_cluster_status.py index d088116c..42e09cc2 100644 --- a/src/module_utils/get_cluster_status.py +++ b/src/module_utils/get_cluster_status.py @@ -86,7 +86,7 @@ def _validate_cluster_basic_status(self, cluster_status_xml: ET.Element): self.result["message"] = f"Node {node.attrib['name']} is not online" self.log(logging.WARNING, self.result["message"]) - def _process_node_attributes(self, node_attributes: ET.Element) -> Dict[str, Any]: + def _process_node_attributes(self, cluster_status_xml: ET.Element) -> Dict[str, Any]: """ Abstract method to process node attributes. @@ -115,7 +115,7 @@ def run(self) -> Dict[str, str]: self.log(logging.INFO, "Cluster status retrieved") self._validate_cluster_basic_status(cluster_status_xml) - self._process_node_attributes(cluster_status_xml.find("node_attributes")) + self._process_node_attributes(cluster_status_xml=cluster_status_xml) if not self._is_cluster_stable(): self.result["message"] = "Pacemaker cluster isn't stable" diff --git a/src/modules/filesystem_freeze.py b/src/modules/filesystem_freeze.py index 61e225e2..17bbcd0a 100644 --- a/src/modules/filesystem_freeze.py +++ b/src/modules/filesystem_freeze.py @@ -21,28 +21,37 @@ module: filesystem_freeze short_description: Freezes the filesystem mounted on /hana/shared description: - - This module freezes (mounts as read-only) the filesystem mounted on /hana/shared - - Identifies the device that is mounted on /hana/shared automatically - - Only proceeds with the operation if NFS provider is Azure NetApp Files (ANF) + - This module freezes (mounts as read-only) the filesystem mounted on /hana/shared. + - Identifies the device that is mounted on /hana/shared automatically. + - Only proceeds with the operation if the NFS provider is Azure NetApp Files (ANF). + - Ensures the operation is skipped for non-ANF providers. options: nfs_provider: description: - - The NFS provider type - - Module only executes if this is set to "ANF" + - The NFS provider type. + - Module only executes if this is set to "ANF". + type: str + required: true + database_sid: + description: + - The SAP HANA database System ID (SID). + - Used to identify the specific /hana/shared/ mount point. type: str required: true author: - Microsoft Corporation notes: - - This module requires root permissions to execute filesystem commands - - Uses /proc/mounts to identify the filesystem device - - Only works with Azure NetApp Files as the NFS provider + - This module requires root permissions to execute filesystem commands. + - Uses /proc/mounts to identify the filesystem device. + - Only works with Azure NetApp Files as the NFS provider. + - Skips the operation if the NFS provider is not ANF. """ EXAMPLES = r""" - name: Freeze the filesystem on /hana/shared filesystem_freeze: nfs_provider: "ANF" + database_sid: "HDB" register: freeze_result - name: Display freeze operation results @@ -52,30 +61,36 @@ - name: Skip freezing for non-ANF providers filesystem_freeze: nfs_provider: "Other" + database_sid: "HDB" register: freeze_result """ RETURN = r""" changed: - description: Whether the module made any changes + description: Whether the module made any changes. returned: always type: bool sample: true message: - description: Status message describing the result + description: Status message describing the result. returned: always type: str sample: "The file system (/hana/shared) was successfully mounted read-only." status: - description: Status code of the operation + description: Status code of the operation. returned: always type: str sample: "SUCCESS" details: - description: Command output from the freeze operation + description: Command output from the freeze operation. + returned: on success + type: str + sample: "filesystem /dev/mapper/vg_hana-shared successfully frozen." +mount_point: + description: The mount point that was frozen. returned: on success type: str - sample: "filesystem /dev/mapper/vg_hana-shared successfully frozen" + sample: "/hana/shared" """ diff --git a/src/modules/get_azure_lb.py b/src/modules/get_azure_lb.py index 53401772..14ff0df0 100644 --- a/src/modules/get_azure_lb.py +++ b/src/modules/get_azure_lb.py @@ -30,38 +30,44 @@ module: get_azure_lb short_description: Gets and validates Azure Load Balancer details description: - - This module retrieves Azure Load Balancer details for DB/SCS/ERS in a specific resource group - - Validates load balancer rules and health probe configurations against expected values - - Uses Azure SDK to interact with Azure Network resources + - This module retrieves Azure Load Balancer details for DB/SCS/ERS in a specific resource group. + - Validates load balancer rules and health probe configurations against expected values. + - Uses Azure SDK to interact with Azure Network resources. options: subscription_id: description: - - The Azure subscription ID + - The Azure subscription ID. type: str required: true region: description: - - Azure region where the resources are deployed + - Azure region where the resources are deployed. type: str required: true inbound_rules: description: - - JSON string containing inbound rule configurations to check for - - Must include privateIpAddress fields to match load balancers + - JSON string containing inbound rule configurations to check for. + - Must include privateIpAddress fields to match load balancers. type: str required: true constants: description: - - Dictionary containing expected configuration values for validation - - Must include AZURE_LOADBALANCER.RULES and AZURE_LOADBALANCER.PROBES + - Dictionary containing expected configuration values for validation. + - Must include AZURE_LOADBALANCER.RULES and AZURE_LOADBALANCER.PROBES. type: dict required: true + msi_client_id: + description: + - Managed Identity Client ID for authentication. + - Optional; if not provided, the default Managed Identity will be used. + type: str + required: false author: - Microsoft Corporation notes: - - Requires Azure SDK for Python - - Uses Managed Identity for authentication - - Must be run on a machine with Managed Identity credentials configured + - Requires Azure SDK for Python. + - Uses Managed Identity for authentication. + - Must be run on a machine with Managed Identity credentials configured. requirements: - python >= 3.6 - azure-identity @@ -88,52 +94,69 @@ - name: Display load balancer validation results debug: var: lb_result + +- name: Use Managed Identity Client ID for authentication + get_azure_lb: + subscription_id: "{{ azure_subscription_id }}" + region: "{{ azure_region }}" + inbound_rules: "{{ inbound_rules | to_json }}" + constants: + AZURE_LOADBALANCER: + RULES: + idle_timeout_in_minutes: 30 + load_distribution: "Default" + enable_floating_ip: True + PROBES: + interval_in_seconds: 15 + number_of_probes: 3 + msi_client_id: "{{ managed_identity_client_id }}" + register: lb_result """ RETURN = r""" status: - description: Status of the validation + description: Status of the validation. returned: always type: str sample: "SUCCESS" message: - description: Descriptive message about the operation and validation results + description: Descriptive message about the operation and validation results. returned: always type: str - sample: "Successfully validated load balancer parameters" + sample: "Successfully validated load balancer parameters." details: - description: Detailed validation results for each parameter + description: Detailed validation results for each parameter. returned: always type: dict contains: parameters: - description: List of parameters validated + description: List of parameters validated. returned: always type: list elements: dict contains: category: - description: Parameter category (load_balancing_rule or probe) + description: Parameter category (load_balancing_rule or probe). type: str sample: "load_balancing_rule" id: - description: Name/identifier of the entity + description: Name/identifier of the entity. type: str sample: "lbRuleSAPILP" name: - description: Name of the parameter + description: Name of the parameter. type: str sample: "idle_timeout_in_minutes" value: - description: Actual value found + description: Actual value found. type: str sample: "30" expected_value: - description: Expected value for comparison + description: Expected value for comparison. type: str sample: "30" status: - description: Result of the comparison + description: Result of the comparison. type: str sample: "SUCCESS" """ @@ -156,13 +179,20 @@ def _create_network_client(self): Create the network client object. """ try: - self.credential = ManagedIdentityCredential() + if self.module_params.get("msi_client_id"): + self.credential = ManagedIdentityCredential( + client_id=self.module_params["msi_client_id"] + ) + else: + self.credential = ManagedIdentityCredential() self.network_client = NetworkManagementClient( self.credential, self.module_params["subscription_id"] ) except Exception as ex: self.handle_error(ex) - self.result["message"] += f"Failed to create network client object. {ex} \n" + self.result["message"] += ( + " Failed to authenticate to Azure to read the Load " + f"Balancer Details. {ex} \n" + ) def get_load_balancers(self) -> list: """ @@ -181,7 +211,7 @@ def get_load_balancers(self) -> list: except Exception as ex: self.handle_error(ex) - self.result["message"] += f"Failed to create network client object. {ex} \n" + self.result["message"] += f" Failed to get load balancers. {ex} \n" def get_load_balancers_details(self) -> dict: """ @@ -290,7 +320,11 @@ def check_parameters(entity, parameters_dict, entity_type): ) self.result["message"] += "Successfully validated load balancer parameters" else: - self.result["message"] += "No load balancer found" + self.result["message"] += ( + "Load Balancer details not fetched." + " Ensure that the Managed Identity (MSI) has sufficient permissions " + "to access the load balancer details." + ) except Exception as ex: self.handle_error(ex) @@ -305,6 +339,7 @@ def run_module(): region=dict(type="str", required=True), inbound_rules=dict(type="str", required=True), constants=dict(type="dict", required=True), + msi_client_id=dict(type="str", required=False), ) module = AnsibleModule(argument_spec=module_args, supports_check_mode=True) diff --git a/src/modules/get_cluster_status_db.py b/src/modules/get_cluster_status_db.py index ee2b02f7..22462283 100644 --- a/src/modules/get_cluster_status_db.py +++ b/src/modules/get_cluster_status_db.py @@ -155,12 +155,12 @@ def _get_automation_register(self) -> None: except Exception: self.result["AUTOMATED_REGISTER"] = "unknown" - def _process_node_attributes(self, node_attributes: ET.Element) -> Dict[str, Any]: + def _process_node_attributes(self, cluster_status_xml: ET.Element) -> Dict[str, Any]: """ Processes node attributes and identifies primary and secondary nodes. - :param node_attributes: XML element containing node attributes. - :type node_attributes: ET.Element + :param cluster_status_xml: XML element containing node attributes. + :type cluster_status_xml: ET.Element :return: Dictionary with primary and secondary node information. :rtype: Dict[str, Any] """ @@ -172,7 +172,7 @@ def _process_node_attributes(self, node_attributes: ET.Element) -> Dict[str, Any "replication_mode": "", "primary_site_name": "", } - + node_attributes = cluster_status_xml.find("node_attributes") attribute_map = { f"hana_{self.database_sid}_op_mode": "operation_mode", f"hana_{self.database_sid}_srmode": "replication_mode", @@ -212,7 +212,6 @@ def _process_node_attributes(self, node_attributes: ET.Element) -> Dict[str, Any result["secondary_node"] = node_name result["cluster_status"]["secondary"] = node_attributes_dict - # Update instance attributes self.result.update(result) return result diff --git a/src/modules/get_cluster_status_scs.py b/src/modules/get_cluster_status_scs.py index 652306f9..76c2cf75 100644 --- a/src/modules/get_cluster_status_scs.py +++ b/src/modules/get_cluster_status_scs.py @@ -5,14 +5,19 @@ Python script to get and validate the status of an SCS cluster. """ +import logging import xml.etree.ElementTree as ET from typing import Dict, Any from ansible.module_utils.basic import AnsibleModule try: from ansible.module_utils.get_cluster_status import BaseClusterStatusChecker + from ansible.module_utils.commands import CIB_ADMIN except ImportError: from src.module_utils.get_cluster_status import BaseClusterStatusChecker + from src.module_utils.commands import ( + CIB_ADMIN, + ) DOCUMENTATION = r""" @@ -20,29 +25,33 @@ module: get_cluster_status_scs short_description: Checks the status of a SAP SCS cluster description: - - This module checks the status of a pacemaker cluster in a SAP SCS environment - - Identifies ASCS and ERS nodes in the cluster - - Validates if the cluster is ready and stable + - This module checks the status of a pacemaker cluster in a SAP SCS environment. + - Identifies ASCS and ERS nodes in the cluster. + - Validates if the cluster is ready and stable. + - Retrieves detailed resource and node attributes for ASCS and ERS. options: sap_sid: description: - - SAP System ID (SID) + - SAP System ID (SID). + - Used to identify the specific ASCS and ERS resources. type: str required: true ansible_os_family: description: - - Operating system family (redhat, suse, etc.) + - Operating system family (e.g., redhat, suse). + - Used to determine OS-specific commands and configurations. type: str required: false author: - Microsoft Corporation notes: - - This module requires root privileges to access pacemaker cluster information - - Depends on crm_mon command being available - - Validates the cluster status by checking node attributes for ASCS and ERS + - This module requires root privileges to access pacemaker cluster information. + - Depends on crm_mon command being available. + - Validates the cluster status by checking node attributes for ASCS and ERS. requirements: - python >= 3.6 - pacemaker cluster environment + - SAP SCS cluster configured """ EXAMPLES = r""" @@ -60,29 +69,44 @@ fail: msg: "SAP SCS cluster is not properly configured" when: cluster_result.ascs_node == '' or cluster_result.ers_node == '' + +- name: Validate detailed cluster attributes + debug: + msg: "ASCS attributes: {{ cluster_result.cluster_status.ascs_node }}, ERS attributes: {{ cluster_result.cluster_status.ers_node }}" """ RETURN = r""" status: - description: Status of the cluster check + description: Status of the cluster check. returned: always type: str sample: "SUCCESS" message: - description: Descriptive message about the cluster status + description: Descriptive message about the cluster status. returned: always type: str - sample: "Cluster is stable and ready" + sample: "Cluster is stable and ready." ascs_node: - description: Name of the node running the ASCS instance + description: Name of the node running the ASCS instance. returned: always type: str sample: "sapapp1" ers_node: - description: Name of the node running the ERS instance + description: Name of the node running the ERS instance. returned: always type: str sample: "sapapp2" +cluster_status: + description: Detailed cluster attributes for ASCS and ERS nodes. + returned: always + type: dict + contains: + ascs_node: + description: Attributes of the ASCS node. + type: dict + ers_node: + description: Attributes of the ERS node. + type: dict """ @@ -91,49 +115,159 @@ class SCSClusterStatusChecker(BaseClusterStatusChecker): Class to check the status of a pacemaker cluster in an SAP SCS environment. """ - def __init__(self, sap_sid: str, ansible_os_family: str = ""): + def __init__( + self, + sap_sid: str, + ansible_os_family: str = "", + ): super().__init__(ansible_os_family) self.sap_sid = sap_sid + self.ascs_resource_id = "" + self.ers_resource_id = "" + self._get_resource_ids() self.result.update( { "ascs_node": "", "ers_node": "", + "ascs_resource_id": self.ascs_resource_id, + "ers_resource_id": self.ers_resource_id, } ) - def _process_node_attributes(self, node_attributes: ET.Element) -> Dict[str, Any]: + def _get_resource_ids(self) -> None: + """ + Retrieves the resource IDs for ASCS and ERS from the cluster status XML. + + :return: None + """ + try: + resources_string = self.execute_command_subprocess(CIB_ADMIN("resources")) + if resources_string is not None: + resources = ET.fromstring(resources_string).findall( + ".//primitive[@type='SAPInstance']" + ) + for resource in resources: + resource_id = resource.attrib.get("id") + instance_attributes = resource.find("instance_attributes") + + if instance_attributes is not None: + is_ers = False + + for nvpair in instance_attributes: + name = nvpair.attrib.get("name") + value = nvpair.attrib.get("value") + + if name == "IS_ERS" and value == "true": + is_ers = True + + if is_ers: + self.ers_resource_id = resource_id + else: + self.ascs_resource_id = resource_id + + except Exception as ex: + self.handle_error(ex) + + def _process_node_attributes(self, cluster_status_xml: ET.Element) -> Dict[str, Any]: """ Processes node attributes and identifies ASCS and ERS nodes. - :param node_attributes: XML element containing node attributes. - :type node_attributes: ET.Element + :param cluster_status_xml: XML element containing node attributes. + :type cluster_status_xml: ET.Element :return: Dictionary with ASCS and ERS node information. :rtype: Dict[str, Any] """ - all_nodes = [node.attrib.get("name") for node in node_attributes] - for node in node_attributes: - node_name = node.attrib["name"] - for attribute in node: - if attribute.attrib["name"] == f"runs_ers_{self.sap_sid.upper()}": - if attribute.attrib["value"] == "1": - self.result["ers_node"] = node_name + result = { + "ascs_node": "", + "ers_node": "", + "cluster_status": { + "ascs_node": {}, + "ers_node": {}, + }, + } + resources = cluster_status_xml.find("resources") + node_attributes = cluster_status_xml.find("node_attributes") + + try: + if node_attributes is not None: + for node in node_attributes: + node_name = node.attrib.get("name") + for attribute in node: + if attribute.attrib.get("name") == f"runs_ers_{self.sap_sid.upper()}": + attr_value = attribute.attrib.get("value") + if attr_value == "1": + result["ers_node"] = node_name + elif attr_value == "0": + result["ascs_node"] = node_name + break + + # If node attributes do not report correct ASCS/ERS nodes, exit + # and return empty values + if result["ascs_node"] == "" and result["ers_node"] == "": + return self.result + + if resources is not None and self.ascs_resource_id and self.ers_resource_id: + ascs_resource = resources.find(f".//resource[@id='{self.ascs_resource_id}']") + ers_resource = resources.find(f".//resource[@id='{self.ers_resource_id}']") + + for resource in [ascs_resource, ers_resource]: + if resource is None: + continue + + resource_id = resource.attrib.get("id") + + node_type = "ascs_node" if resource_id == self.ascs_resource_id else "ers_node" + node_element = resource.find("node") + if node_element is None: + result[node_type] = "" + continue + + node_name = node_element.attrib.get("name") + if node_name is None: + continue + + failed = resource.attrib.get("failed", "false").lower() == "true" + active = resource.attrib.get("active", "false").lower() == "true" + role = resource.attrib.get("role", "unknown").lower() + role_status = role == "started" + + if not failed and active and role_status: + result[node_type] = ( + node_name if result[node_type] == "" else result[node_type] + ) + result["cluster_status"][node_type] = { + "name": node_name, + "id": resource.attrib.get("id"), + "resource_agent": resource.attrib.get("resource_agent"), + "role": role, + "active": "true", + "orphaned": resource.attrib.get("orphaned"), + "blocked": resource.attrib.get("blocked"), + "failed": "false", + "nodes_running_on": resource.attrib.get("nodes_running_on"), + "failure_ignored": resource.attrib.get("failure_ignored"), + } else: - self.result["ascs_node"] = node_name + result[node_type] = "" + else: + self.log( + logging.ERROR, + "Failed to find resources in the cluster status XML.", + ) + except Exception as ex: + self.handle_error(ex) - if self.result["ascs_node"] == "" and self.result["ers_node"] != "": - self.result["ascs_node"] = next( - (n for n in all_nodes if n != self.result["ers_node"]), "" - ) + self.result.update(result) return self.result def _is_cluster_ready(self) -> bool: """ - Check if the cluster is ready by verifying the ASCS node. + Check if the cluster is ready by verifying at least one of ASCS or ERS nodes. - :return: True if the cluster is ready, False otherwise. + :return: True if either ASCS or ERS node is available, False otherwise. :rtype: bool """ - return self.result["ascs_node"] != "" + return self.result["ascs_node"] != "" or self.result["ers_node"] != "" def _is_cluster_stable(self) -> bool: """ diff --git a/src/modules/get_pcmk_properties_db.py b/src/modules/get_pcmk_properties_db.py index 58bff662..5a67e85a 100644 --- a/src/modules/get_pcmk_properties_db.py +++ b/src/modules/get_pcmk_properties_db.py @@ -297,21 +297,32 @@ def _create_parameter( else: expected_value = self._get_expected_value(category, name) + if expected_value is None or value == "": + status = TestStatus.INFO.value + elif isinstance(expected_value, (str, list)): + if isinstance(expected_value, list): + status = ( + TestStatus.SUCCESS.value + if str(value) in expected_value + else TestStatus.ERROR.value + ) + expected_value = expected_value[0] + else: + status = ( + TestStatus.SUCCESS.value + if str(value) == str(expected_value) + else TestStatus.ERROR.value + ) + else: + status = TestStatus.ERROR.value + return Parameters( category=f"{category}_{subcategory}" if subcategory else category, id=id if id else "", name=name if not op_name else f"{op_name}_{name}", value=value, expected_value=expected_value if expected_value is not None else "", - status=( - TestStatus.INFO.value - if expected_value is None or value == "" - else ( - TestStatus.SUCCESS.value - if str(value) == str(expected_value) - else TestStatus.ERROR.value - ) - ), + status=status if status else TestStatus.ERROR.value, ).to_dict() def _parse_nvpair_elements(self, elements, category, subcategory=None, op_name=None): @@ -482,9 +493,6 @@ def _parse_resource(self, element, category): if operations is not None: for operation in operations.findall(".//op"): for op_type in ["timeout", "interval"]: - value = operation.get(op_type, "") - if value.endswith("s"): - value = value[:-1] parameters.append( self._create_parameter( category=category, @@ -492,7 +500,7 @@ def _parse_resource(self, element, category): id=operation.get("id", ""), name=op_type, op_name=operation.get("name", ""), - value=value, + value=operation.get(op_type, ""), ) ) return parameters diff --git a/src/modules/get_pcmk_properties_scs.py b/src/modules/get_pcmk_properties_scs.py index 8468ae96..6fbbf969 100644 --- a/src/modules/get_pcmk_properties_scs.py +++ b/src/modules/get_pcmk_properties_scs.py @@ -34,7 +34,8 @@ module: get_pcmk_properties_scs short_description: Validates Pacemaker cluster configurations for SAP ASCS/ERS description: - - Validates Pacemaker cluster configurations against predefined standards for SAP Application Tier ASCS/ERS deployments + - Validates Pacemaker cluster configurations against predefined standards for SAP Application + Tier ASCS/ERS deployments - Checks basic cluster properties, resource configurations, constraints, and OS parameters - Provides detailed validation results for each parameter - Supports different configurations based on operating system and fencing mechanism @@ -74,6 +75,12 @@ - Type of fencing mechanism used type: str required: true + nfs_provider: + description: + - NFS provider type (e.g., AFS, ANF) + type: str + required: false + default: "" author: - Microsoft Corporation notes: @@ -178,7 +185,8 @@ class HAClusterValidator(SapAutomationQA): CONSTRAINTS_CATEGORIES = (".//*", "CONSTRAINTS_DEFAULTS") RESOURCE_CATEGORIES = { - "stonith": ".//primitive[@class='stonith']", + "sbd_stonith": ".//primitive[@type='external/sbd']", + "fence_agent": ".//primitive[@type='fence_azure_arm']", "ipaddr": ".//primitive[@type='IPaddr2']", "azurelb": ".//primitive[@type='azure-lb']", "azureevents": ".//primitive[@type='azure-events-az']", @@ -193,6 +201,7 @@ def __init__( virtual_machine_name, constants, fencing_mechanism, + nfs_provider=None, category=None, ): super().__init__() @@ -204,6 +213,7 @@ def __init__( self.virtual_machine_name = virtual_machine_name self.fencing_mechanism = fencing_mechanism self.constants = constants + self.nfs_provider = nfs_provider self.parse_ha_cluster_config() def _get_expected_value(self, category, name): @@ -282,6 +292,7 @@ def _create_parameter( :return: Parameters object :rtype: Parameters """ + status = None if expected_value is None: if category in self.RESOURCE_CATEGORIES or category in ["ascs", "ers"]: expected_value = self._get_resource_expected_value( @@ -293,21 +304,38 @@ def _create_parameter( else: expected_value = self._get_expected_value(category, name) + if expected_value is None or value == "": + status = TestStatus.INFO.value + elif isinstance(expected_value, (str, list)): + if isinstance(expected_value, list): + status = ( + TestStatus.SUCCESS.value + if str(value) in expected_value + else TestStatus.ERROR.value + ) + expected_value = expected_value[0] + else: + status = ( + TestStatus.SUCCESS.value + if str(value) == str(expected_value) + else TestStatus.ERROR.value + ) + elif isinstance(expected_value, dict): + expected_value = expected_value.get(self.nfs_provider, "AFS") + status = ( + TestStatus.SUCCESS.value if str(value) in expected_value else TestStatus.ERROR.value + ) + expected_value = expected_value[0] + else: + status = TestStatus.ERROR.value + return Parameters( category=f"{category}_{subcategory}" if subcategory else category, id=id if id else "", name=name if not op_name else f"{op_name}_{name}", value=value, expected_value=expected_value if expected_value is not None else "", - status=( - TestStatus.INFO.value - if expected_value is None or value == "" - else ( - TestStatus.SUCCESS.value - if str(value) == str(expected_value) - else TestStatus.ERROR.value - ) - ), + status=status if status else TestStatus.ERROR.value, ).to_dict() def _parse_nvpair_elements(self, elements, category, subcategory=None, op_name=None): @@ -371,9 +399,6 @@ def _parse_resource(self, element, category): if operations is not None: for operation in operations.findall(".//op"): for op_type in ["timeout", "interval"]: - value = operation.get(op_type, "") - if value.endswith("s"): - value = value[:-1] parameters.append( self._create_parameter( category=category, @@ -381,7 +406,7 @@ def _parse_resource(self, element, category): id=operation.get("id", ""), name=op_type, op_name=operation.get("name", ""), - value=value, + value=operation.get(op_type, ""), ) ) return parameters @@ -565,6 +590,7 @@ def main() -> None: virtual_machine_name=dict(type="str"), pcmk_constants=dict(type="dict"), fencing_mechanism=dict(type="str"), + nfs_provider=dict(type="str", default=""), ) ) @@ -576,6 +602,7 @@ def main() -> None: virtual_machine_name=module.params["virtual_machine_name"], constants=module.params["pcmk_constants"], fencing_mechanism=module.params["fencing_mechanism"], + nfs_provider=module.params.get("nfs_provider"), ) module.exit_json(**validator.get_result()) diff --git a/src/modules/log_parser.py b/src/modules/log_parser.py index 47fb28f0..4c87cdf6 100644 --- a/src/modules/log_parser.py +++ b/src/modules/log_parser.py @@ -17,49 +17,64 @@ DOCUMENTATION = r""" --- module: log_parser -short_description: Parses system logs for SAP-related keywords +short_description: Parses and merges system logs for SAP-related keywords description: - - This module parses system log files for specific SAP and cluster-related keywords - - Filters log entries within a specified time range - - Supports different log formats based on operating system family - - Returns filtered log entries containing predefined or custom keywords + - This module parses system log files for specific SAP and cluster-related keywords. + - Filters log entries within a specified time range. + - Supports merging multiple log files and sorting them chronologically. + - Handles different log formats based on the operating system family. + - Returns filtered or merged log entries containing predefined or custom keywords. options: start_time: description: - - Start time for log filtering in format "YYYY-MM-DD HH:MM:SS" + - Start time for log filtering in format "YYYY-MM-DD HH:MM:SS". type: str - required: true + required: false end_time: description: - - End time for log filtering in format "YYYY-MM-DD HH:MM:SS" + - End time for log filtering in format "YYYY-MM-DD HH:MM:SS". type: str - required: true + required: false log_file: description: - - Path to the log file to be parsed - - Default is system messages log + - Path to the log file to be parsed. + - Default is system messages log. type: str required: false default: /var/log/messages keywords: description: - - Additional keywords to filter logs by - - These are combined with the predefined SAP and Pacemaker keywords + - Additional keywords to filter logs by. + - These are combined with the predefined SAP and Pacemaker keywords. type: list required: false default: [] ansible_os_family: description: - - Operating system family (REDHAT, SUSE, etc.) - - Used to determine the appropriate log timestamp format + - Operating system family (e.g., REDHAT, SUSE). + - Used to determine the appropriate log timestamp format. type: str required: true + function: + description: + - Specifies the function to execute: "parse_logs" or "merge_logs". + type: str + required: true + choices: ["parse_logs", "merge_logs"] + logs: + description: + - List of log entries or JSON strings to merge and sort. + - Used only when the function is set to "merge_logs". + type: list + required: false + default: [] author: - Microsoft Corporation notes: - - Predefined keyword sets are included for Pacemaker and SAP system logs - - Log entries are filtered by both time range and keyword presence - - All entries containing backslashes or quotes will have these characters removed + - Predefined keyword sets are included for Pacemaker and SAP system logs. + - Log entries are filtered by both time range and keyword presence. + - All entries containing backslashes or quotes will have these characters removed. + - Merging logs requires proper timestamp formats based on the OS family. requirements: - python >= 3.6 """ @@ -77,52 +92,53 @@ debug: var: parse_result.filtered_logs -- name: Parse custom log file with additional keywords +- name: Merge and sort multiple log files log_parser: - start_time: "2023-01-01 00:00:00" - end_time: "2023-01-02 00:00:00" - log_file: "/var/log/pacemaker.log" - keywords: - - "SAPHana_HDB_00" - - "error" - - "failure" - ansible_os_family: "SUSE" - register: custom_logs + function: "merge_logs" + logs: + - "[\"Jan 01 12:34:56 server1 pacemaker-controld: Notice: Resource SAPHana_HDB_00 started\"]" + - "[\"Jan 01 12:35:00 server2 pacemaker-controld: Notice: Resource SAPHana_HDB_01 started\"]" + ansible_os_family: "REDHAT" + register: merge_result + +- name: Display merged log entries + debug: + var: merge_result.filtered_logs """ RETURN = r""" status: - description: Status of the log parsing operation + description: Status of the log parsing or merging operation. returned: always type: str sample: "SUCCESS" message: - description: Error message in case of failure + description: Error message in case of failure. returned: on failure type: str - sample: "Could not open file /var/log/messages: No such file or directory" + sample: "Could not open file /var/log/messages: No such file or directory." start_time: - description: Start time used for filtering - returned: always + description: Start time used for filtering. + returned: when function is "parse_logs". type: str sample: "2023-01-01 00:00:00" end_time: - description: End time used for filtering - returned: always + description: End time used for filtering. + returned: when function is "parse_logs". type: str sample: "2023-01-02 00:00:00" log_file: - description: Path to the log file that was parsed - returned: always + description: Path to the log file that was parsed. + returned: when function is "parse_logs". type: str sample: "/var/log/messages" keywords: - description: List of keywords used for filtering - returned: always + description: List of keywords used for filtering. + returned: when function is "parse_logs". type: list sample: ["SAPHana", "pacemaker-fenced", "reboot"] filtered_logs: - description: JSON string containing filtered log entries + description: JSON string containing filtered or merged log entries. returned: always type: str sample: "[\"Jan 01 12:34:56 server1 pacemaker-controld: Notice: Resource SAPHana_HDB_00 started\"]" @@ -180,6 +196,7 @@ def __init__( end_time: str, log_file: str, ansible_os_family: str, + logs: list = None, ): super().__init__() self.start_time = start_time @@ -187,6 +204,7 @@ def __init__( self.log_file = log_file self.keywords = list(PCMK_KEYWORDS | SYS_KEYWORDS) self.ansible_os_family = ansible_os_family + self.logs = logs if logs else [] self.result.update( { "start_time": start_time, @@ -197,6 +215,62 @@ def __init__( } ) + def merge_logs(self) -> None: + """ + Merges multiple log files into a single list for processing. + """ + try: + all_logs = [] + parsed_logs = [] + if not self.logs: + self.result.update( + { + "filtered_logs": json.dumps([]), + "status": TestStatus.SUCCESS.value, + "message": "No logs provided to merge", + } + ) + return + + for logs in self.logs: + if isinstance(logs, str): + try: + parsed = json.loads(logs) + parsed_logs.extend(parsed) + except json.JSONDecodeError: + parsed_logs.append(logs) + else: + parsed_logs.extend(logs) + + for log in parsed_logs: + try: + if self.ansible_os_family == "REDHAT": + timestamp_str = " ".join(log.split()[:3]) + log_time = datetime.strptime(timestamp_str, "%b %d %H:%M:%S") + log_time = log_time.replace(year=datetime.now().year) + all_logs.append((log_time, log)) + + elif self.ansible_os_family == "SUSE": + timestamp_str = log.split(".")[0] + log_time = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S") + all_logs.append((log_time, log)) + + else: + all_logs.append((datetime.min, log)) + except (ValueError, IndexError): + all_logs.append((datetime.min, log)) + + sorted_logs = [log for _, log in sorted(all_logs, key=lambda x: x[0])] + + self.result.update( + { + "filtered_logs": json.dumps(sorted_logs), + "status": TestStatus.SUCCESS.value, + } + ) + except Exception as ex: + self.handle_error(ex) + def parse_logs(self) -> None: """ Parses the logs based on the provided parameters. @@ -245,22 +319,28 @@ def run_module() -> None: Sets up and runs the log parsing module with the specified arguments. """ module_args = dict( - start_time=dict(type="str", required=True), - end_time=dict(type="str", required=True), + start_time=dict(type="str", required=False), + end_time=dict(type="str", required=False), log_file=dict(type="str", required=False, default="/var/log/messages"), keywords=dict(type="list", required=False, default=[]), ansible_os_family=dict(type="str", required=True), + function=dict(type="str", required=True, choices=["parse_logs", "merge_logs"]), + logs=dict(type="list", required=False, default=[]), ) module = AnsibleModule(argument_spec=module_args, supports_check_mode=True) parser = LogParser( - start_time=module.params["start_time"], - end_time=module.params["end_time"], - log_file=module.params["log_file"], + start_time=module.params.get("start_time"), + end_time=module.params.get("end_time"), + log_file=module.params.get("log_file"), ansible_os_family=module.params["ansible_os_family"], + logs=module.params.get("logs"), ) - parser.parse_logs() + if module.params["function"] == "parse_logs": + parser.parse_logs() + elif module.params["function"] == "merge_logs": + parser.merge_logs() result = parser.get_result() module.exit_json(**result) diff --git a/src/playbook_00_ha_db_functional_tests.yml b/src/playbook_00_ha_db_functional_tests.yml index 7d54591c..05e2fd2a 100644 --- a/src/playbook_00_ha_db_functional_tests.yml +++ b/src/playbook_00_ha_db_functional_tests.yml @@ -90,3 +90,8 @@ - name: "Run test cases by including them as roles" ansible.builtin.include_tasks: "./roles/misc/tasks/render-html-report.yml" when: test_group_name is defined + + - name: "Debug the group_invocation_id" + ansible.builtin.debug: + msg: "Group invocation ID: {{ test_group_invocation_id }}" + when: test_group_invocation_id is defined diff --git a/src/playbook_00_ha_scs_functional_tests.yml b/src/playbook_00_ha_scs_functional_tests.yml index 2c2109f4..0495a56c 100644 --- a/src/playbook_00_ha_scs_functional_tests.yml +++ b/src/playbook_00_ha_scs_functional_tests.yml @@ -60,3 +60,8 @@ - name: "Run test cases by including them as roles" ansible.builtin.include_tasks: "./roles/misc/tasks/render-html-report.yml" when: test_group_name is defined + + - name: "Debug the group_invocation_id" + ansible.builtin.debug: + msg: "Group invocation ID: {{ test_group_invocation_id }}" + when: test_group_invocation_id is defined diff --git a/src/roles/ha_db_hana/tasks/azure-lb.yml b/src/roles/ha_db_hana/tasks/azure-lb.yml index 85739b5e..54edfdb7 100644 --- a/src/roles/ha_db_hana/tasks/azure-lb.yml +++ b/src/roles/ha_db_hana/tasks/azure-lb.yml @@ -1,59 +1,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -# /*--------------------------------------------------------------------------- -# | Azure Load Balancer Validation | -# +--------------------------------------------------------------------------*/ -- name: "Azure LB Validation" - run_once: true - block: - - name: "Test Setup Tasks" - ansible.builtin.include_tasks: "roles/misc/tasks/test-case-setup.yml" +- name: Read constants file and set the facts + ansible.builtin.set_fact: + all_constants: "{{ lookup('file', 'constants.yaml') | from_yaml }}" - - name: "Pre Validations: Validate the Azure Load Balancer config" - become: true - block: - - name: "Retrieve Subscription ID and Resource Group Name" - ansible.builtin.uri: - url: http://169.254.169.254/metadata/instance?api-version=2021-02-01 - use_proxy: false - headers: - Metadata: true - register: azure_instance_metadata - - - name: "Get the Azure Load Balancer IP" - ansible.builtin.uri: - url: http://169.254.169.254:80/metadata/loadbalancer?api-version=2020-10-01 - use_proxy: false - headers: - Metadata: true - register: azure_loadbalancer_metadata - - - name: "Azure Load Balancer check for the DB nodes" - delegate_to: localhost - get_azure_lb: - subscription_id: "{{ azure_instance_metadata.json.compute.subscriptionId }}" - region: "{{ azure_instance_metadata.json.compute.location }}" - inbound_rules: "{{ azure_loadbalancer_metadata.json.loadbalancer.inboundRules }}" - constants: "{{ lookup('file', 'constants.yaml') | from_yaml }}" - register: test_result - - - name: "Set the test case status to PASSED" - ansible.builtin.set_fact: - test_case_name: "{{ item.name }}: {{ virtual_host }}" - test_case_message: "{{ test_result.message }}" - test_case_details: "{{ test_result.details }}" - test_case_hostname: "{{ virtual_host }}" - test_case_status: "{{ test_result.status }}" - - rescue: - - name: "Test case failed" - ansible.builtin.set_fact: - test_case_name: "{{ item.name }}: {{ virtual_host }}" - test_case_status: "FAILED" - test_case_details: "{{ test_result }}" - test_case_message: "{{ ansible_failed_result }}" - test_case_hostname: "{{ virtual_host }}" - - - name: "Post Telemetry Data" - ansible.builtin.include_tasks: "roles/misc/tasks/post-telemetry-data.yml" +- name: Include Load Balancer tasks + ansible.builtin.include_tasks: "roles/misc/tasks/loadbalancer.yml" diff --git a/src/roles/ha_db_hana/tasks/block-network.yml b/src/roles/ha_db_hana/tasks/block-network.yml index 69a1a323..e12c87b6 100644 --- a/src/roles/ha_db_hana/tasks/block-network.yml +++ b/src/roles/ha_db_hana/tasks/block-network.yml @@ -48,7 +48,7 @@ - name: "Test Execution: Wait for the cluster to be in a stable state" ansible.builtin.wait_for: - timeout: 60 + timeout: "{{ default_timeout }}" - name: "Test Execution: Check node status on primary and secondary" block: @@ -93,8 +93,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution_primary - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution_primary.primary_node == cluster_status_pre.primary_node and cluster_status_test_execution_primary.secondary_node == "" @@ -110,7 +110,7 @@ - name: "Test Execution: Wait for the cluster to be in a stable state" ansible.builtin.wait_for: - timeout: 60 + timeout: "{{ default_timeout }}" - name: "Test Execution: Validate HANA DB cluster status 2" get_cluster_status_db: @@ -118,8 +118,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_post_primary - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_post_primary.primary_node == cluster_status_pre.primary_node and cluster_status_post_primary.secondary_node == cluster_status_pre.secondary_node @@ -136,15 +136,15 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution_secondary - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution_secondary.primary_node == cluster_status_pre.secondary_node and cluster_status_test_execution_secondary.secondary_node == "" - name: "Test Execution: Wait for the cluster to be in a stable state" ansible.builtin.wait_for: - timeout: 60 + timeout: "{{ default_timeout }}" - name: "Test Execution: Validate HANA DB cluster status 2" get_cluster_status_db: @@ -152,8 +152,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_post_secondary - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_post_secondary.primary_node == cluster_status_pre.secondary_node and cluster_status_post_secondary.secondary_node == cluster_status_pre.primary_node diff --git a/src/roles/ha_db_hana/tasks/files/constants.yaml b/src/roles/ha_db_hana/tasks/files/constants.yaml index f67a8dba..90305953 100644 --- a/src/roles/ha_db_hana/tasks/files/constants.yaml +++ b/src/roles/ha_db_hana/tasks/files/constants.yaml @@ -8,7 +8,7 @@ # cibadmin --query --scope crm_config CRM_CONFIG_DEFAULTS: cluster-infrastructure: corosync - priority-fencing-delay: '30' + priority-fencing-delay: ['30', '30s'] stonith-action: reboot stonith-enabled: 'true' concurrent-fencing: 'true' @@ -20,7 +20,7 @@ CRM_CONFIG_DEFAULTS: # cibadmin --query --scope op_defaults OP_DEFAULTS: record-pending: 'true' - timeout: '600' + timeout: ['600', '600s'] # === Resource Defaults === # cibadmin --query --scope rsc_defaults @@ -43,14 +43,14 @@ CONSTRAINTS: # Specify the properties that are different for different OS versions VALID_CONFIGS: REDHAT: - priority-fencing-delay: '15s' + priority-fencing-delay: ['15', '15s'] SUSE: {} AFA: have-watchdog: "false" - stonith-timeout: "900s" + stonith-timeout: ["900s", "900"] ISCSI: have-watchdog: "true" - stonith-timeout: "144s" + stonith-timeout: ["144", "144s"] # === Resource Defaults === @@ -62,26 +62,26 @@ RESOURCE_DEFAULTS: pcmk_delay_max: "15" pcmk_monitor_retries: "4" pcmk_action_limit: "3" - pcmk_reboot_timeout: "900" - power_timeout: "240" - pcmk_monitor_timeout: "120" + pcmk_reboot_timeout: ["900", "900s"] + power_timeout: ["240", "240s"] + pcmk_monitor_timeout: ["120", "120s"] operations: monitor: - interval: "3600" - timeout: "120" + interval: ["3600", "3600s"] + timeout: ["120", "120s"] sbd_stonith: instance_attributes: pcmk_delay_max: "15" pcmk_monitor_retries: "4" pcmk_action_limit: "3" - pcmk_reboot_timeout: "900" - power_timeout: "240" - pcmk_monitor_timeout: "120" + pcmk_reboot_timeout: ["900", "900s"] + power_timeout: ["240", "240s"] + pcmk_monitor_timeout: ["120", "120s"] operations: monitor: - interval: "600" - timeout: "15" + interval: ["600", "600s"] + timeout: ["15", "15s"] topology: meta_attributes: @@ -90,14 +90,14 @@ RESOURCE_DEFAULTS: interleave: "true" operations: monitor: - interval: "10" - timeout: "600" + interval: ["10", "10s"] + timeout: ["600", "600s"] start: - interval: "0" - timeout: "600" + interval: ["0", "0s"] + timeout: ["600", "600s"] stop: - interval: "0" - timeout: "300" + interval: ["0", "0s"] + timeout: ["300", "300s"] hana: meta_attributes: @@ -113,24 +113,24 @@ RESOURCE_DEFAULTS: AUTOMATED_REGISTER: "true" operations: start: - interval: "0" - timeout: "3600" + interval: ["0", "0s"] + timeout: ["3600", "3600s"] stop: - interval: "0" - timeout: "3600" + interval: ["0", "0s"] + timeout: ["3600", "3600s"] promote: - interval: "0" - timeout: "3600" + interval: ["0", "0s"] + timeout: ["3600", "3600s"] monitor: - timeout: "700" + timeout: ["700", "700s"] ipaddr: meta_attributes: target-role: "Started" operations: monitor: - interval: "10" - timeout: "20" + interval: ["10", "10s"] + timeout: ["20", "20s"] filesystem: meta_attributes: @@ -138,14 +138,14 @@ RESOURCE_DEFAULTS: interleave: "true" operations: monitor: - interval: "120" - timeout: "120" + interval: ["120", "120s"] + timeout: ["120", "120s"] start: - interval: "0" - timeout: "120" + interval: ["0", "0s"] + timeout: ["120", "120s"] stop: - interval: "0" - timeout: "120" + interval: ["0", "0s"] + timeout: ["120", "120s"] azurelb: meta_attributes: @@ -162,19 +162,19 @@ RESOURCE_DEFAULTS: pcmk_monitor_timeout: "120" operations: monitor: - interval: "3600" + interval: ["3600", "3600s"] sbd_stonith: instance_attributes: pcmk_monitor_retries: "4" pcmk_action_limit: "3" - pcmk_reboot_timeout: "900" - power_timeout: "240" - pcmk_monitor_timeout: "120" + pcmk_reboot_timeout: ["900", "900s"] + power_timeout: ["240", "240s"] + pcmk_monitor_timeout: ["120", "120s"] operations: monitor: - interval: "600" - timeout: "15" + interval: ["600", "600s"] + timeout: ["15", "15s"] topology: meta_attributes: @@ -185,20 +185,20 @@ RESOURCE_DEFAULTS: failure-timeout: "120s" operations: monitor: - interval: "10" - timeout: "600" + interval: ["10", "10s"] + timeout: ["600", "600s"] start: - interval: "0" - timeout: "600" + interval: ["0", "0s"] + timeout: ["600", "600s"] stop: - interval: "0" - timeout: "300" + interval: ["0", "0s"] + timeout: ["300", "300s"] methods: - timeout: "5" - interval: "0" + timeout: ["5", "5s"] + interval: ["0", "0s"] reload: - timeout: "5" - interval: "0" + timeout: ["5", "5s"] + interval: ["0", "0s"] hana: meta_attributes: @@ -214,30 +214,30 @@ RESOURCE_DEFAULTS: AUTOMATED_REGISTER: "true" operations: start: - interval: "0" - timeout: "3600" + interval: ["0", "0s"] + timeout: ["3600", "3600s"] stop: - interval: "0" - timeout: "3600" + interval: ["0", "0s"] + timeout: ["3600", "3600s"] promote: - interval: "0" - timeout: "3600" + interval: ["0", "0s"] + timeout: ["3600", "3600s"] monitor: - timeout: "700" + timeout: ["700", "700s"] ipaddr: meta_attributes: target-role: "Started" operations: monitor: - interval: "10" - timeout: "20" + interval: ["10", "10s"] + timeout: ["20", "20s"] start: - interval: "0" - timeout: "20" + interval: ["0", "0s"] + timeout: ["20", "20s"] stop: - interval: "0" - timeout: "20" + interval: ["0", "0s"] + timeout: ["20", "20s"] filesystem: meta_attributes: @@ -245,28 +245,28 @@ RESOURCE_DEFAULTS: interleave: "true" operations: monitor: - interval: "20" - timeout: "120" + interval: ["20", "20s"] + timeout: ["120", "120s"] start: - interval: "0" - timeout: "60" + interval: ["0", "0s"] + timeout: ["60", "60s"] stop: - interval: "0" - timeout: "60" + interval: ["0", "0s"] + timeout: ["60", "60s"] azurelb: meta_attributes: resource-stickiness: "0" operations: monitor: - interval: "10" - timeout: "20" + interval: ["10", "10s"] + timeout: ["20", "20s"] start: - interval: "0" - timeout: "20" + interval: ["0", "0s"] + timeout: ["20", "20s"] stop: - interval: "0" - timeout: "20" + interval: ["0", "0s"] + timeout: ["20", "20s"] # === OS Parameters === @@ -291,7 +291,7 @@ GLOBAL_INI: REDHAT: provider: "SAPHanaSR" - path: "/hana/shared/myHooks" + path: ["/usr/share/SAPHanaSR/srHook", "/hana/shared/myHooks"] execution_order: "1" @@ -301,7 +301,6 @@ AZURE_LOADBALANCER: PROBES: probe_threshold: 2 interval_in_seconds: 5 - number_of_probes: 2 RULES: idle_timeout_in_minutes: 30 diff --git a/src/roles/ha_db_hana/tasks/fs-freeze.yml b/src/roles/ha_db_hana/tasks/fs-freeze.yml index 8787083c..038a1075 100644 --- a/src/roles/ha_db_hana/tasks/fs-freeze.yml +++ b/src/roles/ha_db_hana/tasks/fs-freeze.yml @@ -59,8 +59,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution.primary_node == cluster_status_pre.secondary_node and cluster_status_test_execution.secondary_node == cluster_status_pre.primary_node @@ -74,8 +74,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_post - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_post.primary_node == cluster_status_pre.secondary_node and cluster_status_post.secondary_node == cluster_status_pre.primary_node diff --git a/src/roles/ha_db_hana/tasks/primary-crash-index.yml b/src/roles/ha_db_hana/tasks/primary-crash-index.yml index 792f9438..e94c5123 100644 --- a/src/roles/ha_db_hana/tasks/primary-crash-index.yml +++ b/src/roles/ha_db_hana/tasks/primary-crash-index.yml @@ -58,8 +58,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution.primary_node == cluster_status_pre.secondary_node and cluster_status_test_execution.secondary_node == cluster_status_pre.primary_node @@ -71,8 +71,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution.primary_node == cluster_status_pre.secondary_node and cluster_status_test_execution.secondary_node == "" @@ -119,8 +119,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_post - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_post.primary_node == cluster_status_pre.secondary_node and cluster_status_post.secondary_node == cluster_status_pre.primary_node diff --git a/src/roles/ha_db_hana/tasks/primary-echo-b.yml b/src/roles/ha_db_hana/tasks/primary-echo-b.yml index 66d18924..76588e11 100644 --- a/src/roles/ha_db_hana/tasks/primary-echo-b.yml +++ b/src/roles/ha_db_hana/tasks/primary-echo-b.yml @@ -48,8 +48,8 @@ operation_step: "test_execution" database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" register: cluster_status_test_execution until: | cluster_status_test_execution.primary_node == cluster_status_pre.secondary_node and @@ -61,8 +61,8 @@ operation_step: "test_execution" database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" register: cluster_status_test_execution until: | cluster_status_test_execution.primary_node == cluster_status_pre.secondary_node and @@ -106,8 +106,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_post - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_post.primary_node == cluster_status_pre.secondary_node and cluster_status_post.secondary_node == cluster_status_pre.primary_node diff --git a/src/roles/ha_db_hana/tasks/primary-node-crash.yml b/src/roles/ha_db_hana/tasks/primary-node-crash.yml index 73076a7f..616a09bb 100644 --- a/src/roles/ha_db_hana/tasks/primary-node-crash.yml +++ b/src/roles/ha_db_hana/tasks/primary-node-crash.yml @@ -45,8 +45,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution.primary_node == cluster_status_pre.secondary_node and cluster_status_test_execution.secondary_node == "" @@ -87,8 +87,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_post - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_post.primary_node == cluster_status_pre.secondary_node and cluster_status_post.secondary_node == cluster_status_pre.primary_node diff --git a/src/roles/ha_db_hana/tasks/primary-node-kill.yml b/src/roles/ha_db_hana/tasks/primary-node-kill.yml index 9343474c..d727fa88 100644 --- a/src/roles/ha_db_hana/tasks/primary-node-kill.yml +++ b/src/roles/ha_db_hana/tasks/primary-node-kill.yml @@ -46,8 +46,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution.primary_node == cluster_status_pre.secondary_node and cluster_status_test_execution.secondary_node == cluster_status_pre.primary_node @@ -59,10 +59,10 @@ get_cluster_status_db: operation_step: "test_execution" database_sid: "{{ db_sid | lower }}" - ansible_os_family: "{{ ansible_os_family | upper }}" + ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution.primary_node == cluster_status_pre.secondary_node and cluster_status_test_execution.secondary_node == "" @@ -104,8 +104,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_post - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_post.primary_node == cluster_status_pre.secondary_node and cluster_status_post.secondary_node == cluster_status_pre.primary_node diff --git a/src/roles/ha_db_hana/tasks/resource-migration.yml b/src/roles/ha_db_hana/tasks/resource-migration.yml index 51eae262..d35c3f9e 100644 --- a/src/roles/ha_db_hana/tasks/resource-migration.yml +++ b/src/roles/ha_db_hana/tasks/resource-migration.yml @@ -83,8 +83,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" failed_when: false until: | cluster_status_test_execution.primary_node == cluster_status_pre.secondary_node @@ -138,8 +138,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution_1 - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution_1.primary_node == cluster_status_pre.secondary_node and cluster_status_test_execution_1.secondary_node == cluster_status_pre.primary_node diff --git a/src/roles/ha_db_hana/tasks/sbd-fencing.yml b/src/roles/ha_db_hana/tasks/sbd-fencing.yml index 1456f900..1ea81653 100644 --- a/src/roles/ha_db_hana/tasks/sbd-fencing.yml +++ b/src/roles/ha_db_hana/tasks/sbd-fencing.yml @@ -59,8 +59,8 @@ operation_step: "test_execution" database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" register: cluster_status_test_execution until: | cluster_status_test_execution.primary_node == cluster_status_pre.secondary_node and @@ -75,8 +75,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_post - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_post.primary_node == cluster_status_pre.secondary_node and cluster_status_test_post.secondary_node == cluster_status_pre.primary_node diff --git a/src/roles/ha_db_hana/tasks/secondary-crash-index.yml b/src/roles/ha_db_hana/tasks/secondary-crash-index.yml index fa560092..c7d0a0bf 100644 --- a/src/roles/ha_db_hana/tasks/secondary-crash-index.yml +++ b/src/roles/ha_db_hana/tasks/secondary-crash-index.yml @@ -58,8 +58,8 @@ ansible_os_family: "{{ ansible_os_family | upper }}" database_sid: "{{ db_sid | lower }}" register: cluster_status_test_execution - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution.primary_node == cluster_status_pre.primary_node and cluster_status_test_execution.secondary_node == "" @@ -70,8 +70,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_post - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_post.primary_node == cluster_status_pre.primary_node and cluster_status_post.secondary_node == cluster_status_pre.secondary_node diff --git a/src/roles/ha_db_hana/tasks/secondary-echo-b.yml b/src/roles/ha_db_hana/tasks/secondary-echo-b.yml index 56c8d15f..b6dbb560 100644 --- a/src/roles/ha_db_hana/tasks/secondary-echo-b.yml +++ b/src/roles/ha_db_hana/tasks/secondary-echo-b.yml @@ -52,8 +52,8 @@ operation_step: "test_execution" database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" register: cluster_status_test_execution until: | cluster_status_test_execution.primary_node == cluster_status_pre.primary_node and @@ -65,8 +65,8 @@ database_sid: "{{ db_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_post - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_post.primary_node == cluster_status_pre.primary_node and cluster_status_post.secondary_node == cluster_status_pre.secondary_node diff --git a/src/roles/ha_db_hana/tasks/secondary-node-kill.yml b/src/roles/ha_db_hana/tasks/secondary-node-kill.yml index e595ece5..86e24ad5 100644 --- a/src/roles/ha_db_hana/tasks/secondary-node-kill.yml +++ b/src/roles/ha_db_hana/tasks/secondary-node-kill.yml @@ -51,8 +51,8 @@ ansible_os_family: "{{ ansible_os_family | upper }}" database_sid: "{{ db_sid | lower }}" register: cluster_status_test_execution - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution.primary_node == cluster_status_pre.primary_node and cluster_status_test_execution.secondary_node == "" @@ -63,8 +63,8 @@ ansible_os_family: "{{ ansible_os_family | upper }}" database_sid: "{{ db_sid | lower }}" register: cluster_status_post - retries: 50 - delay: 10 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_post.primary_node == cluster_status_pre.primary_node and cluster_status_post.secondary_node == cluster_status_pre.secondary_node diff --git a/src/roles/ha_scs/tasks/ascs-migration.yml b/src/roles/ha_scs/tasks/ascs-migration.yml index 4715674d..6844c7b8 100644 --- a/src/roles/ha_scs/tasks/ascs-migration.yml +++ b/src/roles/ha_scs/tasks/ascs-migration.yml @@ -20,9 +20,7 @@ # +--------------------------------------------------------------------------*/ - name: "Test Execution: Manual ASCS Migration" become: true - when: - - node_tier == "scs" or node_tier == "ers" - - pre_validations_status == "PASSED" + when: pre_validations_status == "PASSED" block: - name: "Test Execution: Start timer" ansible.builtin.set_fact: @@ -44,9 +42,8 @@ sap_sid: "{{ sap_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution - retries: 50 - delay: 10 - failed_when: false + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution.ascs_node == cluster_status_pre.ers_node and cluster_status_test_execution.ers_node == cluster_status_pre.ascs_node diff --git a/src/roles/ha_scs/tasks/ascs-node-crash.yml b/src/roles/ha_scs/tasks/ascs-node-crash.yml index 8007daca..46325d7d 100644 --- a/src/roles/ha_scs/tasks/ascs-node-crash.yml +++ b/src/roles/ha_scs/tasks/ascs-node-crash.yml @@ -20,14 +20,17 @@ # +--------------------------------------------------------------------------*/ - name: "Test Execution: ASCS Node Crash" become: true - when: - - node_tier == "scs" or node_tier == "ers" - - pre_validations_status == "PASSED" + when: pre_validations_status == "PASSED" block: - name: "Test Execution: Simulate ASCS Node Crash" when: ansible_hostname == cluster_status_pre.ascs_node become: true block: + - name: "Test Execution: Check for ENSA version" + ansible.builtin.shell: pgrep -f 'enq.sap{{ sap_sid | upper }}' + register: ensa2_check + failed_when: false + - name: "Test Execution: Start timer" ansible.builtin.set_fact: test_execution_start: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" @@ -43,18 +46,41 @@ when: ansible_hostname == cluster_status_pre.ers_node become: true block: - - name: "Test Execution: Validate SCS cluster status" + - name: "Test Execution: Validate ASCS node has stopped" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution_pre + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: cluster_status_test_execution_pre.ascs_node == "" + + - name: "Test Execution: Validate SCS cluster status ENSA1" + when: hostvars[cluster_status_pre.ascs_node].ensa2_check.stdout == "" get_cluster_status_scs: sap_sid: "{{ sap_sid | lower }}" ansible_os_family: "{{ ansible_os_family | upper }}" register: cluster_status_test_execution - retries: 50 - delay: 10 - failed_when: false + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" until: | cluster_status_test_execution.ascs_node == cluster_status_pre.ers_node and cluster_status_test_execution.ers_node == cluster_status_pre.ascs_node + - name: "Test Execution: Validate SCS cluster status ENSA2" + when: hostvars[cluster_status_pre.ascs_node].ensa2_check.stdout != "" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + (cluster_status_test_execution.ascs_node == cluster_status_pre.ers_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ascs_node) + or (cluster_status_test_execution.ascs_node == cluster_status_pre.ascs_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ers_node) + - name: "Test Execution: Simulate ASCS Node Crash" when: ansible_hostname == cluster_status_pre.ascs_node become: true @@ -72,9 +98,9 @@ ansible.builtin.set_fact: test_case_message_from_test_case: | Old ASCS: {{ cluster_status_pre.ascs_node }} - New ASCS: {{ hostvars[cluster_status_pre.ers_node].cluster_status_test_execution.ascs_node }} + New ASCS: {{ hostvars[cluster_status_pre.ers_node].cluster_status_test_execution.ascs_node | default('N/A') }} Old ERS: {{ cluster_status_pre.ers_node }} - New ERS: {{ hostvars[cluster_status_pre.ers_node].cluster_status_test_execution.ers_node }} + New ERS: {{ hostvars[cluster_status_pre.ers_node].cluster_status_test_execution.ers_node | default('N/A') }} test_case_details_from_test_case: { "Pre Validations: Validate HANA DB cluster status": "{{ cluster_status_pre }}", "Pre Validations: CleanUp any failed resource": "{{ cleanup_failed_resource_pre }}", diff --git a/src/roles/ha_scs/tasks/azure-lb.yml b/src/roles/ha_scs/tasks/azure-lb.yml new file mode 100644 index 00000000..54edfdb7 --- /dev/null +++ b/src/roles/ha_scs/tasks/azure-lb.yml @@ -0,0 +1,9 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +- name: Read constants file and set the facts + ansible.builtin.set_fact: + all_constants: "{{ lookup('file', 'constants.yaml') | from_yaml }}" + +- name: Include Load Balancer tasks + ansible.builtin.include_tasks: "roles/misc/tasks/loadbalancer.yml" diff --git a/src/roles/ha_scs/tasks/block-network.yml b/src/roles/ha_scs/tasks/block-network.yml new file mode 100644 index 00000000..a561a16a --- /dev/null +++ b/src/roles/ha_scs/tasks/block-network.yml @@ -0,0 +1,175 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +--- +# /*--------------------------------------------------------------------------- +# | Block Network Communication Test | +# +--------------------------------------------------------------------------*/ +- name: "Test Setup Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/test-case-setup.yml" + run_once: true + +# /*--------------------------------------------------------------------------- +# | Pre Validations | +# +--------------------------------------------------------------------------*/ +- name: "Pre Validations: SCS Nodes" + ansible.builtin.include_tasks: "roles/misc/tasks/pre-validations-scs.yml" + +# /*--------------------------------------------------------------------------- +# | Test Execution | +# +--------------------------------------------------------------------------*/ +- name: "Test Execution: Block Network Communication" + become: true + when: pre_validations_status == "PASSED" + block: + - name: "Test Execution: Block Network on ASCS Node" + when: ansible_hostname == cluster_status_pre.ascs_node + become: true + block: + - name: "Test Execution: Start timer" + ansible.builtin.set_fact: + test_execution_start: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + test_execution_hostname: "{{ hostvars[cluster_status_pre.ascs_node].ansible_hostname }}" + + - name: "Test Execution: Get peer ERS node IP address" + ansible.builtin.set_fact: + ers_ip: "{{ hostvars[cluster_status_pre.ers_node].ansible_host }}" + register: ers_ip_result + + - name: "Test Execution: Block network communication" + ansible.builtin.shell: | + iptables -A INPUT -s {{ ers_ip }} -j DROP; + iptables -A OUTPUT -d {{ ers_ip }} -j DROP + register: block_network_result + + - name: "Test Execution: Wait for the cluster to be in a stable state" + ansible.builtin.wait_for: + timeout: "{{ default_timeout }}" + + + - name: "Test Execution: Check node status on primary and secondary" + block: + - name: "Test Execution: Check node connectivity from controller" + delegate_to: localhost + ansible.builtin.shell: | + for i in $(seq 1 30); do + if ! nc -zv -w1 {{ ansible_host }} {{ sap_port_to_ping }}; then + echo "Connection failed on attempt $i" + exit 1 + fi + sleep 1 + done + register: node_status + ignore_errors: true + + - name: "Set Node Status Facts" + ansible.builtin.set_fact: + ascs_node_down: "{{ hostvars[cluster_status_pre.ascs_node].node_status is failed }}" + ers_node_down: "{{ hostvars[cluster_status_pre.ers_node].node_status is failed }}" + + - name: "Fail if both nodes are unavailable" + ansible.builtin.fail: + msg: | + "Error: Both ASCS and ERS nodes are unreachable. + This may be caused by a fence race condition triggered during the block network + test case execution, where both nodes potentially fenced each other. + Please verify if the priority-fencing-delay cluster property is configured. + If not, refer to the SAP on Azure high availability documentation for + instructions on how to configure it." + when: ascs_node_down and ers_node_down + + - name: "Test Execution: Monitor failover on ERS Node" + when: + - ansible_hostname == cluster_status_pre.ers_node + - ascs_node_down and not ers_node_down + become: true + block: + - name: "Test Execution: Validate SCS cluster status" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + cluster_status_test_execution.ascs_node == cluster_status_pre.ers_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ascs_node + + - name: "Test Execution: Restore Network on ASCS Node" + when: + - ansible_hostname == cluster_status_pre.ascs_node + - not ascs_node_down and ers_node_down + become: true + block: + - name: "Test Execution: Restore network communication" + ansible.builtin.shell: | + iptables -D INPUT -s {{ ers_ip }} -j DROP; + iptables -D OUTPUT -d {{ ers_ip }} -j DROP + register: unblock_network_result + + - name: "Test Execution: Restore Network on ASCS Node" + when: ansible_hostname == cluster_status_pre.ascs_node + become: true + block: + - name: "Test Execution: Wait for the cluster to be in a stable state" + ansible.builtin.wait_for: + timeout: "{{ ascs_stonith_timeout }}" + + - name: "Test Execution: Validate SCS cluster status" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + cluster_status_test_execution.ascs_node == cluster_status_pre.ascs_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ers_node + + - name: "Test Execution: Cleanup resources" + ansible.builtin.command: crm_resource --cleanup + register: cleanup_failed_resource_test_execution + changed_when: cleanup_failed_resource_test_execution.rc == 0 + + - name: "Test Execution: Stop timer" + ansible.builtin.set_fact: + test_execution_end: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: | + Old ASCS: {{ cluster_status_pre.ascs_node }} + New ASCS: {{ cluster_status_test_execution.ascs_node }} + Old ERS: {{ cluster_status_pre.ers_node }} + New ERS: {{ cluster_status_test_execution.ers_node }} + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + "Pre Validations: CleanUp any failed resource": "{{ cleanup_failed_resource_pre }}", + "Test Execution: Block network": "{{ block_network_result }}", + "Test Execution: Unblock network": "{{ unblock_network_result }}", + "Test Execution: Cleanup resources": "{{ cleanup_failed_resource_test_execution }}", + "Post Validations Result": "{{ cluster_status_test_execution }}", + } +# /*--------------------------------------------------------------------------- +# | Post Validations | +# +--------------------------------------------------------------------------*/ + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" + + rescue: + - name: "Rescue operation" + ansible.builtin.include_tasks: "roles/misc/tasks/rescue.yml" + +- name: "Pre Validations: Test Case Skipped" + become: true + when: pre_validations_status == "FAILED" + block: + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: "Pre Validations for SCS cluster status checks failed. Please check the details." + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + } + + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" diff --git a/src/roles/ha_scs/tasks/files/constants.yaml b/src/roles/ha_scs/tasks/files/constants.yaml index ce933c3a..7b1b89f4 100644 --- a/src/roles/ha_scs/tasks/files/constants.yaml +++ b/src/roles/ha_scs/tasks/files/constants.yaml @@ -8,26 +8,26 @@ # cibadmin --query --scope crm_config CRM_CONFIG_DEFAULTS: cluster-infrastructure: corosync - priority-fencing-delay: '30' + priority-fencing-delay: ["30", "30s"] stonith-action: reboot - stonith-enabled: 'false' - concurrent-fencing: 'true' - maintenance-mode: 'false' - node-health-strategy: 'custom' - azure-events-az_globalPullState: 'IDLE' + stonith-enabled: "true" + concurrent-fencing: "true" + maintenance-mode: "false" + node-health-strategy: "custom" + azure-events-az_globalPullState: "IDLE" # === Operation Defaults === # cibadmin --query --scope op_defaults OP_DEFAULTS: - record-pending: 'true' - timeout: '600' + record-pending: "true" + timeout: ["600", "600s"] # === Resource Defaults === # cibadmin --query --scope rsc_defaults RSC_DEFAULTS: - migration-threshold: '3' - priority: '1' - resource-stickiness: '1' + migration-threshold: "3" + priority: "1" + resource-stickiness: "1" # === Constraints === # cibadmin --query --scope constraints @@ -51,36 +51,49 @@ CONSTRAINTS: # Specify the properties that are different for different OS versions VALID_CONFIGS: REDHAT: - priority-fencing-delay: '15s' + priority-fencing-delay: "15s" SUSE: {} AFA: have-watchdog: "false" - stonith-timeout: "900" + stonith-timeout: ["900", "900s"] ISCSI: have-watchdog: "true" - stonith-timeout: "144s" + stonith-timeout: ["144", "144s"] # === Resource Defaults === # cibadmin --query --scope resources RESOURCE_DEFAULTS: SUSE: - stonith: + fence_agent: instance_attributes: pcmk_delay_max: "15" pcmk_monitor_retries: "4" pcmk_action_limit: "3" - pcmk_reboot_timeout: "900" - power_timeout: "240" + pcmk_reboot_timeout: ["900", "900s"] + power_timeout: ["240", "240s"] operations: monitor: - interval: "3600" - timeout: "120" + interval: ["3600", "3600s"] + timeout: ["120", "120s"] start: - interval: "0" - timeout: "20" + interval: ["0", "0s"] + timeout: ["20", "20s"] stop: - interval: "0" - timeout: "20" + interval: ["0", "0s"] + timeout: ["20", "20s"] + + sbd_stonith: + instance_attributes: + pcmk_delay_max: "15" + pcmk_monitor_retries: "4" + pcmk_action_limit: "3" + pcmk_reboot_timeout: ["900", "900s"] + power_timeout: ["240", "240s"] + pcmk_monitor_timeout: ["120", "120s"] + operations: + monitor: + interval: ["600", "600s"] + timeout: ["15", "15s"] ascs: instance_attributes: @@ -91,20 +104,20 @@ RESOURCE_DEFAULTS: priority: "100" operations: monitor: - interval: "11" - timeout: "60" + interval: ["11", "11s"] + timeout: ["105", "105s"] start: - interval: "0" - timeout: "180" + interval: ["0", "0s"] + timeout: ["180", "180s"] stop: - interval: "0" - timeout: "240" + interval: ["0", "0s"] + timeout: ["240", "240s"] promote: - interval: "0" - timeout: "320" + interval: ["0", "0s"] + timeout: ["320", "320s"] demote: - interval: "0" - timeout: "320" + interval: ["0", "0s"] + timeout: ["320", "320s"] ers: instance_attributes: @@ -116,48 +129,48 @@ RESOURCE_DEFAULTS: priority: "100" operations: monitor: - interval: "11" - timeout: "60" + interval: ["11", "11s"] + timeout: ["105", "105s"] start: - interval: "0" - timeout: "180" + interval: ["0", "0s"] + timeout: ["180", "180s"] stop: - interval: "0" - timeout: "240" + interval: ["0", "0s"] + timeout: ["240", "240s"] promote: - interval: "0" - timeout: "320" + interval: ["0", "0s"] + timeout: ["320", "320s"] demote: - interval: "0" - timeout: "320" + interval: ["0", "0s"] + timeout: ["320", "320s"] ipaddr: meta_attributes: target-role: "Started" operations: monitor: - interval: "10" - timeout: "20" + interval: ["10", "10s"] + timeout: ["20", "20s"] start: - interval: "0" - timeout: "20" + interval: ["0", "0s"] + timeout: ["20", "20s"] stop: - interval: "0" - timeout: "20" + interval: ["0", "0s"] + timeout: ["20", "20s"] azurelb: meta_attributes: resource-stickiness: "0" operations: monitor: - interval: "10" - timeout: "20" + interval: ["10", "10s"] + timeout: ["20", "20s"] start: - interval: "0" - timeout: "20" + interval: ["0", "0s"] + timeout: ["20", "20s"] stop: - interval: "0" - timeout: "20" + interval: ["0", "0s"] + timeout: ["20", "20s"] azureevents: meta_attributes: @@ -165,14 +178,150 @@ RESOURCE_DEFAULTS: failure-timeout: "120s" operations: monitor: - interval: "10" - timeout: "240" + interval: ["10", "10s"] + start: + interval: ["0", "0s"] + + REDHAT: + fence_agent: + instance_attributes: + pcmk_delay_max: "15" + pcmk_monitor_retries: "4" + pcmk_action_limit: "3" + pcmk_reboot_timeout: ["900", "900s"] + power_timeout: ["240", "240s"] + operations: + monitor: + interval: "3600" + timeout: ["120", "120s"] + start: + interval: ["0", "0s"] + timeout: ["20", "20s"] + stop: + interval: ["0", "0s"] + timeout: ["20", "20s"] + + sbd_stonith: + instance_attributes: + pcmk_delay_max: "15" + pcmk_monitor_retries: "4" + pcmk_action_limit: "3" + pcmk_reboot_timeout: ["900", "900s"] + power_timeout: ["240", "240s"] + pcmk_monitor_timeout: ["120", "120s"] + operations: + monitor: + interval: "600" + timeout: ["15", "15s"] + + ascs: + instance_attributes: + AUTOMATIC_RECOVER: "false" + MINIMAL_PROBE: "true" + meta_attributes: + resource-stickiness: "5000" + priority: "10" + operations: + monitor: + interval: ["20", "20s"] + timeout: + ANF: ["105", "105s"] + AFS: ["60", "60s"] start: - interval: "0" - timeout: "10" + interval: ["0", "0s"] + timeout: ["600", "600s"] stop: - interval: "0" - timeout: "10" + interval: ["0", "0s"] + timeout: ["600", "600s"] + promote: + interval: ["0", "0s"] + timeout: ["320", "320s"] + demote: + interval: ["0", "0s"] + timeout: ["320", "320s"] + methods: + timeout: ["5", "5s"] + interval: ["0", "0s"] + reload: + timeout: ["320", "320s"] + interval: ["0", "0s"] + + ers: + instance_attributes: + AUTOMATIC_RECOVER: "false" + MINIMAL_PROBE: "true" + IS_ERS: "true" + meta_attributes: + resource-stickiness: "3000" + priority: "100" + operations: + monitor: + interval: ["20", "20s"] + timeout: + ANF: ["105", "105s"] + AFS: ["60", "60s"] + start: + interval: ["0", "0s"] + timeout: ["600", "600s"] + stop: + interval: ["0", "0s"] + timeout: ["600", "600s"] + promote: + interval: ["0", "0s"] + timeout: ["320", "320s"] + demote: + interval: ["0", "0s"] + timeout: ["320", "320s"] + methods: + timeout: ["5", "5s"] + interval: ["0", "0s"] + reload: + timeout: ["320", "320s"] + interval: ["0", "0s"] + + ipaddr: + meta_attributes: + target-role: "Started" + operations: + monitor: + interval: ["10", "10s"] + timeout: ["20", "20s"] + start: + interval: ["0", "0s"] + timeout: ["20", "20s"] + stop: + interval: ["0", "0s"] + timeout: ["20", "20s"] + + azurelb: + meta_attributes: + resource-stickiness: "0" + operations: + monitor: + interval: ["10", "10s"] + timeout: ["20", "20s"] + start: + interval: ["0", "0s"] + timeout: ["20", "20s"] + stop: + interval: ["0", "0s"] + timeout: ["20", "20s"] + + azureevents: + meta_attributes: + allow-unhealthy-nodes: "true" + failure-timeout: ["120", "120s"] + operations: + monitor: + interval: ["10", "10s"] + timeout: ["240", "240s"] + start: + interval: ["0", "0s"] + timeout: ["10", "10s"] + stop: + interval: ["0", "0s"] + timeout: ["10", "10s"] + # === OS Parameters === # Run command as root. Format of command is: "parent_key child_key" @@ -192,7 +341,6 @@ AZURE_LOADBALANCER: PROBES: probe_threshold: 2 interval_in_seconds: 5 - number_of_probes: 2 RULES: idle_timeout_in_minutes: 30 diff --git a/src/roles/ha_scs/tasks/ha-config.yml b/src/roles/ha_scs/tasks/ha-config.yml index cb87d903..6a846045 100644 --- a/src/roles/ha_scs/tasks/ha-config.yml +++ b/src/roles/ha_scs/tasks/ha-config.yml @@ -28,6 +28,7 @@ virtual_machine_name: "{{ azure_instance_metadata.json.compute.name }}" pcmk_constants: "{{ lookup('file', 'constants.yaml') | from_yaml }}" fencing_mechanism: "{{ scs_cluster_type }}" + nfs_provider: "{{ NFS_provider }}" register: test_result - name: "Set the test case status to PASSED" diff --git a/src/roles/ha_scs/tasks/ha-failover-to-node.yml b/src/roles/ha_scs/tasks/ha-failover-to-node.yml new file mode 100644 index 00000000..a14c47c3 --- /dev/null +++ b/src/roles/ha_scs/tasks/ha-failover-to-node.yml @@ -0,0 +1,108 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +--- +# /*--------------------------------------------------------------------------- +# | HAFailoverToNode Test | +# +--------------------------------------------------------------------------*/ +- name: "Test Setup Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/test-case-setup.yml" + run_once: true + +# /*--------------------------------------------------------------------------- +# | Pre Validations | +# +--------------------------------------------------------------------------*/ +- name: "Pre Validations: SCS Nodes" + ansible.builtin.include_tasks: "roles/misc/tasks/pre-validations-scs.yml" + +# /*--------------------------------------------------------------------------- +# | Test Execution | +# +--------------------------------------------------------------------------*/ +- name: "Test Execution: HAFailoverToNode" + become: true + when: pre_validations_status == "PASSED" and (ansible_os_family | upper) == "SUSE" + block: + - name: "Test Execution: Execute HAFailoverToNode on ASCS Node" + when: ansible_hostname == cluster_status_pre.ascs_node + become: true + block: + - name: "Test Execution: Start timer" + ansible.builtin.set_fact: + test_execution_start: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + test_execution_hostname: "{{ hostvars[cluster_status_pre.ascs_node].ansible_hostname }}" + + - name: "Test Execution: Execute HAFailoverToNode command" + become: true + become_user: "{{ sap_sid | lower }}adm" + ansible.builtin.shell: sapcontrol -nr {{ scs_instance_number }} -function HAFailoverToNode {{ cluster_status_pre.ers_node }} + environment: + PATH: /usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/usr/sap/{{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64:/usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/home/{{ sap_sid | lower }}adm + DIR_LIBRARY: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run + LD_LIBRARY_PATH: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/usr/sap/{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64 + SAPSYSTEMNAME: "{{ sap_sid | upper }}" + register: ha_failover_result + failed_when: false + + - name: "Test Execution: Validate SCS cluster status" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + cluster_status_test_execution.ascs_node == cluster_status_pre.ers_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ascs_node + + - name: "Test Execution: Remove location constraints" + ansible.builtin.command: "{{ commands | selectattr( + 'name', 'equalto', 'ascs_resource_unmigrate_cmd') | map( + attribute=(ansible_os_family | upper)) | first }}" + register: unmigrate_result + + - name: "Test Execution: Cleanup resources" + ansible.builtin.command: crm_resource --cleanup + register: cleanup_failed_resource_test_execution + changed_when: cleanup_failed_resource_test_execution.rc == 0 + + - name: "Test Execution: Stop timer" + ansible.builtin.set_fact: + test_execution_end: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: | + Old ASCS: {{ cluster_status_pre.ascs_node }} + New ASCS: {{ cluster_status_test_execution.ascs_node }} + Old ERS: {{ cluster_status_pre.ers_node }} + New ERS: {{ cluster_status_test_execution.ers_node }} + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + "Pre Validations: CleanUp any failed resource": "{{ cleanup_failed_resource_pre }}", + "Test Execution: HAFailoverToNode Command": "{{ ha_failover_result }}", + "Test Execution: Cleanup resources": "{{ cleanup_failed_resource_test_execution }}", + "Post Validations Result": "{{ cluster_status_test_execution }}", + } +# /*--------------------------------------------------------------------------- +# | Post Validations | +# +--------------------------------------------------------------------------*/ + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" + + rescue: + - name: "Rescue operation" + ansible.builtin.include_tasks: "roles/misc/tasks/rescue.yml" + +- name: "Pre Validations: Test Case Skipped" + become: true + when: pre_validations_status == "FAILED" + block: + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: "Pre Validations for SCS cluster status checks failed. Please check the details." + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + } + + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" diff --git a/src/roles/ha_scs/tasks/kill-enqueue-replication.yml b/src/roles/ha_scs/tasks/kill-enqueue-replication.yml new file mode 100644 index 00000000..b181f4cb --- /dev/null +++ b/src/roles/ha_scs/tasks/kill-enqueue-replication.yml @@ -0,0 +1,117 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +--- +# /*--------------------------------------------------------------------------- +# | Kill Enqueue Replication Server Process | +# +--------------------------------------------------------------------------*/ +- name: "Test Setup Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/test-case-setup.yml" + run_once: true + +# /*--------------------------------------------------------------------------- +# | Pre Validations | +# +--------------------------------------------------------------------------*/ +- name: "Pre Validations: SCS Nodes" + ansible.builtin.include_tasks: "roles/misc/tasks/pre-validations-scs.yml" + +# /*--------------------------------------------------------------------------- +# | Test Execution | +# +--------------------------------------------------------------------------*/ +- name: "Test Execution: Kill Enqueue Replication Server Process" + become: true + when: pre_validations_status == "PASSED" + block: + - name: "Test Execution: Kill Enqueue Replication Server on ERS Node" + when: ansible_hostname == cluster_status_pre.ers_node + become: true + block: + - name: "Test Execution: Start timer" + ansible.builtin.set_fact: + test_execution_start: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + test_execution_hostname: "{{ hostvars[cluster_status_pre.ers_node].ansible_hostname }}" + + - name: "Test Execution: Check for ENSA version" + ansible.builtin.shell: pgrep -f 'enqr.sap{{ sap_sid | upper }}' + register: ensa2_check + failed_when: false + + - name: "Test Execution: Kill Enqueue Replication Server Process (ENSA1)" + when: ensa2_check.stdout == "" + ansible.builtin.shell: set -o pipefail && pgrep -f 'er.sap{{ sap_sid | upper }}' | xargs kill -9 + register: kill_er_result + failed_when: false + + - name: "Test Execution: Kill Enqueue Replication Server Process (ENSA2)" + when: ensa2_check.stdout != "" + ansible.builtin.shell: set -o pipefail && pgrep -f 'enqr.sap{{ sap_sid | upper }}' | xargs kill -9 + register: kill_enqr_result + failed_when: false + + - name: "Test Execution: Validate ERS node is not running" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution_pre + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: cluster_status_test_execution_pre.ers_node == "" + + - name: "Test Execution: Cleanup resources" + ansible.builtin.command: crm_resource --cleanup + register: cleanup_failed_resource_test_execution + changed_when: cleanup_failed_resource_test_execution.rc == 0 + + - name: "Test Execution: Validate SCS cluster status" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + cluster_status_test_execution.ascs_node == cluster_status_pre.ascs_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ers_node + + - name: "Test Execution: Stop timer" + ansible.builtin.set_fact: + test_execution_end: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: | + ENSA Version: {{ "ENSA1" if ensa2_check.stdout == "" else "ENSA2" }} + ASCS Node: {{ cluster_status_pre.ascs_node }} + ERS Node: {{ cluster_status_pre.ers_node }} + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + "Pre Validations: CleanUp any failed resource": "{{ cleanup_failed_resource_pre }}", + "Test Execution: ENSA Version Check": "{{ ensa2_check }}", + "Test Execution: Kill Enqueue Replication Server Process (ENSA1)": "{{ kill_er_result | default(omit) }}", + "Test Execution: Kill Enqueue Replication Server Process (ENSA2)": "{{ kill_enqr_result | default(omit) }}", + "Test Execution: Cleanup resources": "{{ cleanup_failed_resource_test_execution }}", + "Post Validations Result": "{{ cluster_status_test_execution }}", + } +# /*--------------------------------------------------------------------------- +# | Post Validations | +# +--------------------------------------------------------------------------*/ + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" + + rescue: + - name: "Rescue operation" + ansible.builtin.include_tasks: "roles/misc/tasks/rescue.yml" + +- name: "Pre Validations: Test Case Skipped" + become: true + when: pre_validations_status == "FAILED" + block: + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: "Pre Validations for SCS cluster status checks failed. Please check the details." + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + } + + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" diff --git a/src/roles/ha_scs/tasks/kill-enqueue-server.yml b/src/roles/ha_scs/tasks/kill-enqueue-server.yml new file mode 100644 index 00000000..8c0d811d --- /dev/null +++ b/src/roles/ha_scs/tasks/kill-enqueue-server.yml @@ -0,0 +1,132 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +--- +# /*--------------------------------------------------------------------------- +# | Kill Enqueue Server Process | +# +--------------------------------------------------------------------------*/ +- name: "Test Setup Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/test-case-setup.yml" + run_once: true + +# /*--------------------------------------------------------------------------- +# | Pre Validations | +# +--------------------------------------------------------------------------*/ +- name: "Pre Validations: SCS Nodes" + ansible.builtin.include_tasks: "roles/misc/tasks/pre-validations-scs.yml" + +# /*--------------------------------------------------------------------------- +# | Test Execution | +# +--------------------------------------------------------------------------*/ +- name: "Test Execution: Kill Enqueue Server Process" + become: true + when: pre_validations_status == "PASSED" + block: + - name: "Test Execution: Kill Enqueue Server on ASCS Node" + when: ansible_hostname == cluster_status_pre.ascs_node + become: true + block: + - name: "Test Execution: Start timer" + ansible.builtin.set_fact: + test_execution_start: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + test_execution_hostname: "{{ hostvars[cluster_status_pre.ascs_node].ansible_hostname }}" + + - name: "Test Execution: Check for ENSA version" + ansible.builtin.shell: pgrep -f 'enq.sap{{ sap_sid | upper }}' + register: ensa2_check + failed_when: false + + - name: "Test Execution: Kill Enqueue Server Process (ENSA1)" + ansible.builtin.shell: set -o pipefail && pgrep -f 'en.sap{{ sap_sid | upper }}' | xargs kill -9 + register: kill_er_result + failed_when: false + when: ensa2_check.stdout == "" + + - name: "Test Execution: Kill Enqueue Server Process (ENSA2)" + ansible.builtin.shell: set -o pipefail && pgrep -f 'enq.sap{{ sap_sid | upper }}' | xargs kill -9 + register: kill_en_result + failed_when: false + when: ensa2_check.stdout != "" + + - name: "Test Execution: Validate ASCS node has stopped" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution_pre + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: cluster_status_test_execution_pre.ascs_node == "" + + - name: "Test Execution: Cleanup resources" + ansible.builtin.command: crm_resource --cleanup + register: cleanup_failed_resource_test_execution + changed_when: cleanup_failed_resource_test_execution.rc == 0 + + - name: "Test Execution: Validate SCS cluster status for ENSA1" + when: ensa2_check.stdout == "0" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + cluster_status_test_execution.ascs_node == cluster_status_pre.ers_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ascs_node + + - name: "Test Execution: Validate SCS cluster status for ENSA2" + when: ensa2_check.stdout != "0" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + (cluster_status_test_execution.ascs_node == cluster_status_pre.ers_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ascs_node) + or (cluster_status_test_execution.ascs_node == cluster_status_pre.ascs_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ers_node) + + - name: "Test Execution: Stop timer" + ansible.builtin.set_fact: + test_execution_end: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: | + ENSA Version: {{ "ENSA1" if ensa2_check.stdout == "0" else "ENSA2" }} + Old ASCS: {{ cluster_status_pre.ascs_node }} + New ASCS: {{ cluster_status_test_execution.ascs_node }} + Old ERS: {{ cluster_status_pre.ers_node }} + New ERS: {{ cluster_status_test_execution.ers_node }} + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + "Pre Validations: CleanUp any failed resource": "{{ cleanup_failed_resource_pre }}", + "Test Execution: Kill Enqueue Server Process": "{{ kill_en_result }}", + "Test Execution: Cleanup resources": "{{ cleanup_failed_resource_test_execution }}", + "Post Validations Result": "{{ cluster_status_test_execution }}", + } +# /*--------------------------------------------------------------------------- +# | Post Validations | +# +--------------------------------------------------------------------------*/ + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" + + rescue: + - name: "Rescue operation" + ansible.builtin.include_tasks: "roles/misc/tasks/rescue.yml" + +- name: "Pre Validations: Test Case Skipped" + become: true + when: pre_validations_status == "FAILED" + block: + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: "Pre Validations for SCS cluster status checks failed. Please check the details." + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + } + + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" diff --git a/src/roles/ha_scs/tasks/kill-message-server.yml b/src/roles/ha_scs/tasks/kill-message-server.yml new file mode 100644 index 00000000..dcda2d70 --- /dev/null +++ b/src/roles/ha_scs/tasks/kill-message-server.yml @@ -0,0 +1,128 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +--- +# /*--------------------------------------------------------------------------- +# | Kill Message Server Process | +# +--------------------------------------------------------------------------*/ +- name: "Test Setup Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/test-case-setup.yml" + run_once: true + +# /*--------------------------------------------------------------------------- +# | Pre Validations | +# +--------------------------------------------------------------------------*/ +- name: "Pre Validations: SCS Nodes" + ansible.builtin.include_tasks: "roles/misc/tasks/pre-validations-scs.yml" + +# /*--------------------------------------------------------------------------- +# | Test Execution | +# +--------------------------------------------------------------------------*/ +- name: "Test Execution: Kill Message Server Process" + become: true + when: pre_validations_status == "PASSED" + block: + - name: "Test Execution: Kill MS Process on ASCS Node" + when: ansible_hostname == cluster_status_pre.ascs_node + become: true + block: + - name: "Test Execution: Start timer" + ansible.builtin.set_fact: + test_execution_start: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + test_execution_hostname: "{{ hostvars[cluster_status_pre.ascs_node].ansible_hostname }}" + + - name: "Test Execution: Check for ENSA version" + ansible.builtin.shell: pgrep -f 'enq.sap{{ sap_sid | upper }}' + register: ensa2_check + failed_when: false + + - name: "Test Execution: Kill Message Server Process" + ansible.builtin.shell: set -o pipefail && pgrep -f 'ms.sap{{ sap_sid | upper }}' | xargs kill -9 + register: kill_ms_result + until: "'kill: not enough arguments' in kill_ms_result.stderr" + ignore_errors: true + retries: 10 + delay: 1 + + - name: "Test Execution: Validate ASCS node has stopped" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution_pre + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: cluster_status_test_execution_pre.ascs_node == "" + + - name: "Test Execution: Validate SCS cluster status for ENSA1" + when: ensa2_check.stdout == "" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + cluster_status_test_execution.ascs_node == cluster_status_pre.ers_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ascs_node + + - name: "Test Execution: Validate SCS cluster status for ENSA2" + when: ensa2_check.stdout != "1" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + (cluster_status_test_execution.ascs_node == cluster_status_pre.ers_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ascs_node) + or (cluster_status_test_execution.ascs_node == cluster_status_pre.ascs_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ers_node) + + - name: "Test Execution: Cleanup resources" + ansible.builtin.command: crm_resource --cleanup + register: cleanup_failed_resource_test_execution + changed_when: cleanup_failed_resource_test_execution.rc == 0 + + - name: "Test Execution: Stop timer" + ansible.builtin.set_fact: + test_execution_end: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: | + ENSA Version: {{ "ENSA1" if ensa2_check.stdout == "" else "ENSA2" }} + Old ASCS: {{ cluster_status_pre.ascs_node }} + New ASCS: {{ cluster_status_test_execution.ascs_node }} + Old ERS: {{ cluster_status_pre.ers_node }} + New ERS: {{ cluster_status_test_execution.ers_node }} + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + "Pre Validations: CleanUp any failed resource": "{{ cleanup_failed_resource_pre }}", + "Test Execution: Kill Message Server Process": "{{ kill_ms_result }}", + "Test Execution: Cleanup resources": "{{ cleanup_failed_resource_test_execution }}", + "Post Validations Result": "{{ cluster_status_test_execution }}", + } +# /*--------------------------------------------------------------------------- +# | Post Validations | +# +--------------------------------------------------------------------------*/ + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" + + rescue: + - name: "Rescue operation" + ansible.builtin.include_tasks: "roles/misc/tasks/rescue.yml" + +- name: "Pre Validations: Test Case Skipped" + become: true + when: pre_validations_status == "FAILED" + block: + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: "Pre Validations for SCS cluster status checks failed. Please check the details." + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + } + + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" diff --git a/src/roles/ha_scs/tasks/kill-sapstartsrv-process.yml b/src/roles/ha_scs/tasks/kill-sapstartsrv-process.yml new file mode 100644 index 00000000..f1a63db8 --- /dev/null +++ b/src/roles/ha_scs/tasks/kill-sapstartsrv-process.yml @@ -0,0 +1,117 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +--- +# /*--------------------------------------------------------------------------- +# | Kill sapstartsrv Process | +# +--------------------------------------------------------------------------*/ +- name: "Test Setup Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/test-case-setup.yml" + run_once: true + +# /*--------------------------------------------------------------------------- +# | Pre Validations | +# +--------------------------------------------------------------------------*/ +- name: "Pre Validations: SCS Nodes" + ansible.builtin.include_tasks: "roles/misc/tasks/pre-validations-scs.yml" + +# /*--------------------------------------------------------------------------- +# | Test Execution | +# +--------------------------------------------------------------------------*/ +- name: "Test Execution: Kill sapstartsrv Process" + become: true + when: pre_validations_status == "PASSED" + block: + - name: "Test Execution: Kill sapstartsrv on ASCS Node" + when: ansible_hostname == cluster_status_pre.ascs_node + become: true + block: + - name: "Test Execution: Start timer" + ansible.builtin.set_fact: + test_execution_start: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + test_execution_hostname: "{{ hostvars[cluster_status_pre.ascs_node].ansible_hostname }}" + + - name: "Test Execution: Find sapstartsrv PID" + ansible.builtin.shell: set -o pipefail && pgrep -fl 'ASCS{{ scs_instance_number }}.*sapstartsrv' | grep -v pgrep | awk '{print $1}' + environment: + PATH: /usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/usr/sap/{{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64:/usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/home/{{ sap_sid | lower }}adm + LD_LIBRARY_PATH: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/usr/sap/{{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64 + register: sapstartsrv_pid + failed_when: sapstartsrv_pid.stdout == "" + + - name: "Test Execution: Kill sapstartsrv Process" + ansible.builtin.shell: set -o pipefail && echo '{{ sapstartsrv_pid.stdout_lines | join(" ") }}' | xargs -r kill -9 + register: kill_sapstartsrv_result + failed_when: false + + - name: "Test Execution: Wait for the cluster to be in a stable state" + ansible.builtin.wait_for: + timeout: 30 + + - name: "Test Execution: Find sapstartsrv PID after killing the process" + ansible.builtin.shell: set -o pipefail && pgrep -fl 'ASCS{{ scs_instance_number }}.*sapstartsrv' | grep -v pgrep | awk '{print $1}' + environment: + PATH: /usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/usr/sap/{{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64:/usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/home/{{ sap_sid | lower }}adm + LD_LIBRARY_PATH: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/usr/sap/{{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64 + register: sapstartsrv_pid_after_kill + failed_when: sapstartsrv_pid_after_kill.stdout == "" + + - name: "Test Execution: Validate the process restarted with new PID" + ansible.builtin.assert: + that: sapstartsrv_pid_after_kill.stdout != sapstartsrv_pid.stdout + msg: "The sapstartsrv process did not restart with a new PID after being killed." + + - name: "Test Execution: Validate SCS cluster status" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + cluster_status_test_execution.ascs_node == cluster_status_pre.ascs_node + and cluster_status_test_execution.ers_node == cluster_status_pre.ers_node + + - name: "Test Execution: Stop timer" + ansible.builtin.set_fact: + test_execution_end: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: | + Old ASCS: {{ cluster_status_pre.ascs_node }} + New ASCS: {{ cluster_status_test_execution.ascs_node }} + Old ERS: {{ cluster_status_pre.ers_node }} + New ERS: {{ cluster_status_test_execution.ers_node }} + Old PID: {{ sapstartsrv_pid.stdout }} + New PID: {{ sapstartsrv_pid_after_kill.stdout }} + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + "Pre Validations: CleanUp any failed resource": "{{ cleanup_failed_resource_pre }}", + "Test Execution: Find sapstartsrv PID": "{{ sapstartsrv_pid }}", + "Test Execution: Find sapstartsrv PID after killing the process": "{{ sapstartsrv_pid_after_kill }}", + "Post Validations Result": "{{ cluster_status_test_execution }}", + } +# /*--------------------------------------------------------------------------- +# | Post Validations | +# +--------------------------------------------------------------------------*/ + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" + + rescue: + - name: "Rescue operation" + ansible.builtin.include_tasks: "roles/misc/tasks/rescue.yml" + +- name: "Pre Validations: Test Case Skipped" + become: true + when: pre_validations_status == "FAILED" + block: + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: "Pre Validations for SCS cluster status checks failed. Please check the details." + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + } + + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" diff --git a/src/roles/ha_scs/tasks/manual-restart.yml b/src/roles/ha_scs/tasks/manual-restart.yml new file mode 100644 index 00000000..580d3f77 --- /dev/null +++ b/src/roles/ha_scs/tasks/manual-restart.yml @@ -0,0 +1,126 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +--- +# /*--------------------------------------------------------------------------- +# | Manual Restart of ASCS Instance | +# +--------------------------------------------------------------------------*/ +- name: "Test Setup Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/test-case-setup.yml" + run_once: true + +# /*--------------------------------------------------------------------------- +# | Pre Validations | +# +--------------------------------------------------------------------------*/ +- name: "Pre Validations: SCS Nodes" + ansible.builtin.include_tasks: "roles/misc/tasks/pre-validations-scs.yml" + +# /*--------------------------------------------------------------------------- +# | Test Execution | +# +--------------------------------------------------------------------------*/ +- name: "Test Execution: Manual Restart of ASCS Instance" + become: true + when: pre_validations_status == "PASSED" and (ansible_os_family | upper) == "SUSE" + block: + - name: "Test Execution: Stop ASCS on ASCS Node" + when: ansible_hostname == cluster_status_pre.ascs_node + become: true + block: + - name: "Test Execution: Start timer" + ansible.builtin.set_fact: + test_execution_start: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + test_execution_hostname: "{{ hostvars[cluster_status_pre.ascs_node].ansible_hostname }}" + + - name: "Test Execution: Stop ASCS Instance" + become: true + become_user: "{{ sap_sid | lower }}adm" + ansible.builtin.shell: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run/sapcontrol -nr {{ scs_instance_number }} -function StopWait 600 2 + environment: + PATH: /usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/usr/sap/{{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64:/usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/home/{{ sap_sid | lower }}adm + DIR_LIBRARY: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run + LD_LIBRARY_PATH: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/usr/sap/{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64 + SAPSYSTEMNAME: "{{ sap_sid | upper }}" + register: stop_ascs_result + failed_when: false + + - name: "Test Execution: Validate SCS cluster status 1" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + cluster_status_test_execution.ascs_node == "" + and cluster_status_test_execution.ers_node == cluster_status_pre.ers_node + + - name: "Test Execution: Start ASCS Instance" + become: true + become_user: "{{ sap_sid | lower }}adm" + ansible.builtin.shell: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run/sapcontrol -nr {{ scs_instance_number }} -function StartWait 600 2 + environment: + PATH: /usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/usr/sap/{{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64:/usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/home/{{ sap_sid | lower }}adm + DIR_LIBRARY: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run + LD_LIBRARY_PATH: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/usr/sap/{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64 + SAPSYSTEMNAME: "{{ sap_sid | upper }}" + register: start_ascs_result + failed_when: false + + - name: "Test Execution: Cleanup resources" + ansible.builtin.command: crm_resource --cleanup + register: cleanup_failed_resource_test_execution + changed_when: cleanup_failed_resource_test_execution.rc == 0 + + - name: "Test Execution: Validate SCS cluster status 2" + get_cluster_status_scs: + sap_sid: "{{ sap_sid | lower }}" + ansible_os_family: "{{ ansible_os_family | upper }}" + register: cluster_status_test_execution_2 + retries: "{{ default_retries }}" + delay: "{{ default_delay }}" + until: | + cluster_status_test_execution_2.ascs_node == cluster_status_pre.ascs_node + and cluster_status_test_execution_2.ers_node == cluster_status_pre.ers_node + + - name: "Test Execution: Stop timer" + ansible.builtin.set_fact: + test_execution_end: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: | + Old ASCS: {{ cluster_status_pre.ascs_node }} + New ASCS: {{ cluster_status_test_execution_2.ascs_node }} + Old ERS: {{ cluster_status_pre.ers_node }} + New ERS: {{ cluster_status_test_execution_2.ers_node }} + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + "Pre Validations: CleanUp any failed resource": "{{ cleanup_failed_resource_pre }}", + "Test Execution: Stop ASCS Instance": "{{ stop_ascs_result }}", + "Test Execution: Start ASCS Instance": "{{ start_ascs_result }}", + "Test Execution: Cleanup resources": "{{ cleanup_failed_resource_test_execution }}", + "Post Validations Result": "{{ cluster_status_test_execution_2 }}", + } +# /*--------------------------------------------------------------------------- +# | Post Validations | +# +--------------------------------------------------------------------------*/ + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" + + rescue: + - name: "Rescue operation" + ansible.builtin.include_tasks: "roles/misc/tasks/rescue.yml" + +- name: "Pre Validations: Test Case Skipped" + become: true + when: pre_validations_status == "FAILED" + block: + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: "Pre Validations for SCS cluster status checks failed. Please check the details." + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + } + + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" diff --git a/src/roles/ha_scs/tasks/sapcontrol-config.yml b/src/roles/ha_scs/tasks/sapcontrol-config.yml new file mode 100644 index 00000000..70f04fb2 --- /dev/null +++ b/src/roles/ha_scs/tasks/sapcontrol-config.yml @@ -0,0 +1,96 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +--- +# /*--------------------------------------------------------------------------- +# | sapcontrol commands to validate scs | +# +--------------------------------------------------------------------------*/ +- name: "Test Setup Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/test-case-setup.yml" + run_once: true + +# /*--------------------------------------------------------------------------- +# | Pre Validations | +# +--------------------------------------------------------------------------*/ +- name: "Pre Validations: SCS Nodes" + ansible.builtin.include_tasks: "roles/misc/tasks/pre-validations-scs.yml" + +# /*--------------------------------------------------------------------------- +# | Test Execution | +# +--------------------------------------------------------------------------*/ +- name: "Test Execution: SAPControl Config" + become: true + when: pre_validations_status == "PASSED" and (ansible_os_family | upper) == "SUSE" + block: + - name: "Test Execution: Run sapcontrol command" + when: ansible_hostname == cluster_status_pre.ascs_node + become: true + block: + - name: "Test Execution: Start timer" + ansible.builtin.set_fact: + test_execution_start: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + test_execution_hostname: "{{ hostvars[cluster_status_pre.ascs_node].ansible_hostname }}" + + - name: "Test Execution: Run sapcontrol commands {{ sapcontrol_method }}" + become: true + become_user: "{{ sap_sid | lower }}adm" + ansible.builtin.shell: sapcontrol -nr {{ scs_instance_number }} -function {{ sapcontrol_method }} + environment: + PATH: /usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/usr/sap/{{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64:/usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/home/{{ sap_sid | lower }}adm + DIR_LIBRARY: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run + LD_LIBRARY_PATH: /usr/sap/{{ sap_sid | upper }}/SYS/exe/run:/usr/sap/{{ sap_sid | upper }}/SYS/exe/uc/linuxx86_64 + SAPSYSTEMNAME: "{{ sap_sid | upper }}" + register: sapcontrol_results + failed_when: false + loop: + - "HAGetFailoverConfig" + - "HACheckFailoverConfig" + - "HACheckConfig" + loop_control: + loop_var: sapcontrol_method + + - name: "Test Execution: Validate sapcontrol commands" + ansible.builtin.set_fact: + test_execution_end: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" + test_case_message_from_test_case: >- + Commands executed: + {% for res in sapcontrol_results.results %} + - {{ res.sapcontrol_method }}: {{ 'Success' if res.rc == 0 else 'Failed' }} + {% endfor %} + + - name: "Test Execution: Validate sapcontrol commands" + ansible.builtin.set_fact: + test_case_details_from_test_case: >- + {{ (test_case_details_from_test_case | default([])) + [{ + 'command': sapcontrol_result.cmd, + 'returnCode': sapcontrol_result.rc, + 'stdout': sapcontrol_result.stdout_lines, + 'stderr': sapcontrol_result.stderr_lines + }] }} + loop: "{{ sapcontrol_results.results }}" + loop_control: + loop_var: sapcontrol_result + +# /*--------------------------------------------------------------------------- +# | Post Validations | +# +--------------------------------------------------------------------------*/ + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" + + rescue: + - name: "Rescue operation" + ansible.builtin.include_tasks: "roles/misc/tasks/rescue.yml" + +- name: "Pre Validations: Test Case Skipped" + become: true + when: pre_validations_status == "FAILED" + block: + - name: "Set test case message and details" + ansible.builtin.set_fact: + test_case_message_from_test_case: "Pre Validations for SCS cluster status checks failed. Please check the details." + test_case_details_from_test_case: { + "Pre Validations: Validate SCS cluster status": "{{ cluster_status_pre }}", + } + + - name: "Post Validations Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/post-validations.yml" diff --git a/src/roles/misc/tasks/loadbalancer.yml b/src/roles/misc/tasks/loadbalancer.yml new file mode 100644 index 00000000..bd1d955e --- /dev/null +++ b/src/roles/misc/tasks/loadbalancer.yml @@ -0,0 +1,60 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +# /*--------------------------------------------------------------------------- +# | Azure Load Balancer Validation | +# +--------------------------------------------------------------------------*/ +- name: "Azure LB Validation" + run_once: true + block: + - name: "Test Setup Tasks" + ansible.builtin.include_tasks: "roles/misc/tasks/test-case-setup.yml" + + - name: "Pre Validations: Validate the Azure Load Balancer config" + become: true + block: + - name: "Retrieve Subscription ID and Resource Group Name" + ansible.builtin.uri: + url: http://169.254.169.254/metadata/instance?api-version=2021-02-01 + use_proxy: false + headers: + Metadata: true + register: azure_instance_metadata + + - name: "Get the Azure Load Balancer IP" + ansible.builtin.uri: + url: http://169.254.169.254:80/metadata/loadbalancer?api-version=2020-10-01 + use_proxy: false + headers: + Metadata: true + register: azure_loadbalancer_metadata + + - name: "Azure Load Balancer check for the DB nodes" + delegate_to: localhost + get_azure_lb: + subscription_id: "{{ azure_instance_metadata.json.compute.subscriptionId }}" + region: "{{ azure_instance_metadata.json.compute.location }}" + inbound_rules: "{{ azure_loadbalancer_metadata.json.loadbalancer.inboundRules }}" + constants: "{{ all_constants }}" + msi_client_id: "{{ user_assigned_identity_client_id | default('') }}" + register: test_result + + - name: "Set the test case status" + ansible.builtin.set_fact: + test_case_name: "{{ item.name }}: {{ virtual_host }}" + test_case_message: "{{ test_result.message }}" + test_case_details: "{{ test_result.details }}" + test_case_hostname: "{{ virtual_host }}" + test_case_status: "{{ test_result.status }}" + + rescue: + - name: "Test case failed" + ansible.builtin.set_fact: + test_case_name: "{{ item.name }}: {{ virtual_host }}" + test_case_status: "FAILED" + test_case_details: "{{ test_result }}" + test_case_message: "{{ ansible_failed_result }}" + test_case_hostname: "{{ virtual_host }}" + + - name: "Post Telemetry Data" + ansible.builtin.include_tasks: "roles/misc/tasks/post-telemetry-data.yml" diff --git a/src/roles/misc/tasks/post-validations.yml b/src/roles/misc/tasks/post-validations.yml index c31e09e6..f3ea15d9 100644 --- a/src/roles/misc/tasks/post-validations.yml +++ b/src/roles/misc/tasks/post-validations.yml @@ -31,22 +31,18 @@ - is_primary_node | bool - cluster_status_pre is defined block: - - name: "Combine logs from both Nodes" + - name: "Merge and sort logs from all nodes by timestamp" + log_parser: + function: "merge_logs" + ansible_os_family: "{{ ansible_os_family | upper }}" + logs: + - "{{ hostvars[primary_node]['var_log_messages_output'].filtered_logs | default('[]') }}" + - "{{ hostvars[secondary_node]['var_log_messages_output'].filtered_logs | default('[]') }}" + register: merged_logs_output + + - name: "Set combined logs variable" ansible.builtin.set_fact: - combined_logs: >- - {{ - ( - hostvars[primary_node]['var_log_messages_output'].filtered_logs - | default('[]') - | from_json - ) - + - ( - hostvars[secondary_node]['var_log_messages_output'].filtered_logs - | default('[]') - | from_json - ) - }} + combined_logs: "{{ merged_logs_output.filtered_logs | from_json }}" - name: "Post Validations: Set test case status" delegate_to: localhost diff --git a/src/roles/misc/tasks/pre-validations-db.yml b/src/roles/misc/tasks/pre-validations-db.yml index 2ff428bf..2f1d1033 100644 --- a/src/roles/misc/tasks/pre-validations-db.yml +++ b/src/roles/misc/tasks/pre-validations-db.yml @@ -28,7 +28,7 @@ ansible.builtin.command: crm_resource --cleanup ignore_errors: true register: cleanup_failed_resource_pre - timeout: 60 + timeout: "{{ default_timeout }}" retries: 3 until: cleanup_failed_resource_pre.rc == 0 changed_when: cleanup_failed_resource_pre.rc == 0 diff --git a/src/roles/misc/tasks/pre-validations-scs.yml b/src/roles/misc/tasks/pre-validations-scs.yml index af9dbe93..660a8044 100644 --- a/src/roles/misc/tasks/pre-validations-scs.yml +++ b/src/roles/misc/tasks/pre-validations-scs.yml @@ -20,7 +20,7 @@ ansible.builtin.command: crm_resource --cleanup ignore_errors: true register: cleanup_failed_resource_pre - timeout: 60 + timeout: "{{ default_timeout }}" retries: 3 until: cleanup_failed_resource_pre.rc == 0 changed_when: cleanup_failed_resource_pre.rc == 0 diff --git a/src/roles/misc/tasks/rescue.yml b/src/roles/misc/tasks/rescue.yml index c3e04643..01e4f788 100644 --- a/src/roles/misc/tasks/rescue.yml +++ b/src/roles/misc/tasks/rescue.yml @@ -36,22 +36,19 @@ ) }} - - name: "Combine logs from both Nodes" + - name: "Merge and sort logs from all nodes by timestamp" + delegate_to: localhost + log_parser: + function: "merge_logs" + ansible_os_family: "{{ ansible_os_family | upper }}" + logs: + - "{{ hostvars[first_node]['var_log_messages_output'].filtered_logs | default('[]') }}" + - "{{ hostvars[second_node]['var_log_messages_output'].filtered_logs | default('[]') }}" + register: merged_logs_output + + - name: "Set combined logs variable" ansible.builtin.set_fact: - combined_logs: >- - {{ - ( - hostvars[first_node]['var_log_messages_output'].filtered_logs - | default('[]') - | from_json - ) - + - ( - hostvars[second_node]['var_log_messages_output'].filtered_logs - | default('[]') - | from_json - ) - }} + combined_logs: "{{ merged_logs_output.filtered_logs | from_json }}" - name: "Post Validations: Rescure operation set test case status" delegate_to: localhost diff --git a/src/roles/misc/tasks/var-log-messages.yml b/src/roles/misc/tasks/var-log-messages.yml index fe6b14a5..bbda82b2 100644 --- a/src/roles/misc/tasks/var-log-messages.yml +++ b/src/roles/misc/tasks/var-log-messages.yml @@ -13,4 +13,5 @@ start_time: "{{ test_execution_start | default(test_case_start_time_epoch) }}" end_time: "{{ now(utc=true, fmt='%Y-%m-%d %H:%M:%S') }}" ansible_os_family: "{{ ansible_os_family | upper }}" + function: "parse_logs" register: var_log_messages_output diff --git a/src/vars/input-api.yaml b/src/vars/input-api.yaml index 9b2d9e28..ce6e527c 100644 --- a/src/vars/input-api.yaml +++ b/src/vars/input-api.yaml @@ -130,25 +130,37 @@ test_groups: mechanisms, and ensures system replication resumes properly after service restoration. enabled: true - - name: HA_SCS test_cases: - name: "HA Parameters Validation" task_name: ha-config description: | - The HA parameter validation test validates HA configuration, - including Corosync settings, Pacemaker resources, SBD device configuration, - and HANA system replication setup. + The HA parameter validation test validates HA configuration + including Corosync settings, Pacemaker resources, SBD device configuration, and SCS system + replication setup. + enabled: true + + - name: Azure Load Balancer Validation + task_name: azure-lb + description: | + The Azure LB configuration test validates Azure Load Balancer setup including health probe + configuration, backend pool settings, load balancing rules, and frontend IP configuration. + enabled: true + + - name: "SAPControl Config Validation" + task_name: sapcontrol-config + description: | + The SAPControl Config Validation test runs multiple sapcontrol commands to validate the + SCS configuration. It executes commands like HAGetFailoverConfig, + HACheckFailoverConfig, and HACheckConfig, capturing their outputs and statuses to + ensure proper configuration and functionality. enabled: true - name: "Manual ASCS Migration" task_name: ascs-migration description: | - The Resource Migration test validates planned failover scenarios by executing controlled - resource movement between ASCS and ERS nodes. It performs a graceful migration of the ASCS - resources to the ERS node, verifies proper role changes, ensures cluster maintains - stability throughout the transition, and validates complete data synchronization after - migration. + The Resource Migration test validates planned failover scenarios by controlling resource + movement between SCS nodes, ensuring proper role changes. enabled: true - name: "ASCS Node Crash" @@ -157,7 +169,66 @@ test_groups: The ASCS Node Crash test simulates cluster behavior when the ASCS node crashes. It simulates an ASCS node failure by forcefully terminating the process, then verifies automatic failover to the ERS node, monitors system replication status, and confirms - service recovery without data loss. + service recovery. + enabled: true + + - name: "Block Network Communication" + task_name: block-network + description: | + The Block Network test validates cluster behavior during network partition scenarios by + implementing iptables rules to block communication between ASCS and ERS nodes. It + verifies split-brain prevention mechanisms, validates proper failover execution when + nodes become isolated, and ensures cluster stability after network connectivity is + restored. + enabled: true + + - name: "Kill Message Server Process" + task_name: kill-message-server + description: | + The Message Server Process Kill test simulates failure of the message server process on + the ASCS node by forcefully terminating it using the kill -9 signal. It verifies proper + cluster reaction, automatic failover to the ERS node, and ensures service continuity + after the process failure. + enabled: true + + - name: "Kill Enqueue Server Process" + task_name: kill-enqueue-server + description: | + The Enqueue Server Process Kill test simulates failure of the enqueue server process on + the ASCS node by forcefully terminating it using the kill -9 signal. It validates proper + cluster behavior, automatic failover execution. + enabled: true + + - name: "Kill Enqueue Replication Server Process" + task_name: kill-enqueue-replication + description: | + The Enqueue Server Process Kill test simulates failure of the enqueue server process on + the ASCS node by forcefully terminating it using the kill -9 signal. It validates proper + cluster behavior, automatic failover execution. + enabled: true + + - name: "Kill sapstartsrv Process for ASCS" + task_name: kill-sapstartsrv-process + description: | + The Enqueue Server Process Kill test simulates failure of the enqueue server process on + the ASCS node by forcefully terminating it using the kill -9 signal. It validates proper + cluster behavior, automatic failover execution. + enabled: true + + - name: "Manual Restart of ASCS Instance" + task_name: manual-restart + description: | + The Enqueue Server Process Kill test simulates failure of the enqueue server process on + the ASCS node by forcefully terminating it using the kill -9 signal. It validates proper + cluster behavior, automatic failover execution. + enabled: true + + - name: "HAFailoverToNode Test" + task_name: ha-failover-to-node + description: | + The Enqueue Server Process Kill test simulates failure of the enqueue server process on + the ASCS node by forcefully terminating it using the kill -9 signal. It validates proper + cluster behavior, automatic failover execution. enabled: true @@ -170,6 +241,12 @@ ers_instance_number: "01" NFS_provider: "AFS" sap_port_to_ping: "1128" +# Default values for retries, delay, timeout +default_retries: 50 +default_delay: 10 +default_timeout: 60 +ascs_stonith_timeout: 120 + # Commands for HANA DB HA Test Cases based on OS family commands: - name: get_hana_resource_id @@ -184,10 +261,14 @@ commands: SUSE: "crm_report -f '{{ test_group_start_time }}' /tmp/{{ test_group_invocation_id }}" REDHAT: "crm_report -f '{{ test_group_start_time }}' --dest /tmp/{{ test_group_invocation_id }} -yes" + - name: get_sap_instance_resource_id + SUSE: "cibadmin --query --xpath \"//primitive[@type='SAPInstance']\" --node-path | grep -oP \"primitive\\[@id='\\K[^']+\" | grep ASCS" + REDHAT: "cibadmin --query --xpath \"//primitive[@type='SAPInstance']\" --node-path | grep ASCS | grep -oP \"primitive\\[@id='\\K[^']+\"" + - name: ascs_resource_migration_cmd - SUSE: "crm resource move rsc_sap_{{ sap_sid }}_ASCS{{ scs_instance_number }} force" - REDHAT: "pcs resource move rsc_sap_{{ sap_sid }}_ASCS{{ scs_instance_number }}" + SUSE: "crm resource move {{ cluster_status_pre.ascs_resource_id | default('rsc_sap_' ~ sap_sid ~ '_ASCS' ~ scs_instance_number) }} force" + REDHAT: "pcs resource move {{ cluster_status_pre.ascs_resource_id | default('rsc_sap_' ~ sap_sid ~ '_ASCS' ~ scs_instance_number) }}" - name: ascs_resource_unmigrate_cmd - SUSE: "crm resource clear rsc_sap_{{ sap_sid }}_ASCS{{ scs_instance_number }}" - REDHAT: "pcs resource clear rsc_sap_{{ sap_sid }}_ASCS{{ scs_instance_number }}" + SUSE: "crm resource clear {{ cluster_status_pre.ascs_resource_id | default('rsc_sap_' ~ sap_sid ~ '_ASCS' ~ scs_instance_number) }}" + REDHAT: "pcs resource clear {{ cluster_status_pre.ascs_resource_id | default('rsc_sap_' ~ sap_sid ~ '_ASCS' ~ scs_instance_number) }}" diff --git a/tests/modules/get_cluster_status_db_test.py b/tests/modules/get_cluster_status_db_test.py index 4075252e..42f36490 100644 --- a/tests/modules/get_cluster_status_db_test.py +++ b/tests/modules/get_cluster_status_db_test.py @@ -69,20 +69,22 @@ def test_process_node_attributes_primary_only(self, hana_checker): :param hana_checker: Instance of HanaClusterStatusChecker. :type hana_checker: HanaClusterStatusChecker """ + xml_str = """ - - - - - - - - - + + + + + + + + + + + """ - node_attributes = ET.fromstring(xml_str) - result = hana_checker._process_node_attributes(node_attributes) + result = hana_checker._process_node_attributes(ET.fromstring(xml_str)) assert result["primary_node"] == "node1" assert result["secondary_node"] == "" @@ -98,24 +100,24 @@ def test_process_node_attributes_both_nodes(self, hana_checker): :type hana_checker: HanaClusterStatusChecker """ xml_str = """ - - - - - - - - - - - - - - - """ - node_attributes = ET.fromstring(xml_str) - - result = hana_checker._process_node_attributes(node_attributes) + + + + + + + + + + + + + + + + + """ + result = hana_checker._process_node_attributes(ET.fromstring(xml_str)) assert result["primary_node"] == "node1" assert result["secondary_node"] == "node2" diff --git a/tests/modules/get_cluster_status_scs_test.py b/tests/modules/get_cluster_status_scs_test.py index 186ea0ca..1fe334d3 100644 --- a/tests/modules/get_cluster_status_scs_test.py +++ b/tests/modules/get_cluster_status_scs_test.py @@ -25,23 +25,72 @@ def scs_checker(self): """ return SCSClusterStatusChecker(sap_sid="TST", ansible_os_family="REDHAT") - def test_process_node_attributes(self, scs_checker): + def test_get_resource_ids(self, mocker, scs_checker): + """ + Test the _get_resource_ids method to ensure ASCS and ERS resource IDs are + correctly identified. + + :param mocker: Mocking library to patch methods. + :type mocker: pytest_mock.MockerFixture + :param scs_checker: Instance of SCSClusterStatusChecker. + :type scs_checker: SCSClusterStatusChecker + """ + mock_resources_xml = """ + + + + + + + + + + + + + """ + + mocker.patch.object( + scs_checker, + "execute_command_subprocess", + return_value=mock_resources_xml, + ) + + scs_checker._get_resource_ids() + assert scs_checker.ascs_resource_id == "rsc_sap_TST_ASCS00" + assert scs_checker.ers_resource_id == "rsc_sap_TST_ERS01" + + def test_process_node_attributes(self, mocker, scs_checker): """ Test processing node attributes to identify ASCS and ERS nodes. + :param mocker: Mocker fixture for mocking functions. + :type mocker: pytest_mock.MockerFixture :param scs_checker: Instance of SCSClusterStatusChecker. :type scs_checker: SCSClusterStatusChecker """ xml_str = """ - - - - - - - - + + + + + + + + + + + + + + + + + + """ + scs_checker.ascs_resource_id = "rsc_sap_TST_ASCS00" + scs_checker.ers_resource_id = "rsc_sap_TST_ERS01" node_attributes = ET.fromstring(xml_str) scs_checker._process_node_attributes(node_attributes) @@ -57,20 +106,29 @@ def test_process_node_attributes_incomplete(self, scs_checker): :type scs_checker: SCSClusterStatusChecker """ xml_str = """ - - - - - - - - + + + + + + + + + + + + + + + """ + scs_checker.ascs_resource_id = "rsc_sap_TST_ASCS00" + scs_checker.ers_resource_id = "rsc_sap_TST_ERS01" node_attributes = ET.fromstring(xml_str) scs_checker._process_node_attributes(node_attributes) - assert scs_checker.result["ascs_node"] == "node1" + assert scs_checker.result["ascs_node"] == "" assert scs_checker.result["ers_node"] == "node2" def test_is_cluster_ready(self, scs_checker): @@ -119,7 +177,12 @@ def test_run_module(self, mocker): :type mocker: mocker.MockerFixture """ mock_ansible_module = mocker.MagicMock() - mock_ansible_module.params = {"sap_sid": "TST", "ansible_os_family": "REDHAT"} + mock_ansible_module.params = { + "sap_sid": "TST", + "ansible_os_family": "REDHAT", + "scs_instance_number": "00", + "ers_instance_number": "01", + } mocker.patch( "src.modules.get_cluster_status_scs.AnsibleModule", return_value=mock_ansible_module ) diff --git a/tests/modules/get_pcmk_properties_db_test.py b/tests/modules/get_pcmk_properties_db_test.py index 97d8ce66..58b11860 100644 --- a/tests/modules/get_pcmk_properties_db_test.py +++ b/tests/modules/get_pcmk_properties_db_test.py @@ -112,9 +112,13 @@ "CRM_CONFIG_DEFAULTS": {"stonith-enabled": "true"}, "RESOURCE_DEFAULTS": { "REDHAT": { - "stonith": { - "meta_attributes": {"priority": "10"}, - "operations": {"monitor": {"timeout": "30"}}, + "fence_agent": { + "meta_attributes": {"pcmk_delay_max": "15"}, + "operations": {"monitor": {"timeout": ["700", "700s"]}}, + }, + "sbd_stonith": { + "meta_attributes": {"pcmk_delay_max": "15"}, + "operations": {"monitor": {"timeout": ["30", "30s"]}}, }, "hana": {"meta_attributes": {"clone-max": "2"}}, } diff --git a/tests/modules/log_parser_test.py b/tests/modules/log_parser_test.py index 5ee16063..15f984d2 100644 --- a/tests/modules/log_parser_test.py +++ b/tests/modules/log_parser_test.py @@ -145,6 +145,7 @@ def __init__(self, argument_spec, supports_check_mode): "end_time": "2023-01-01 23:59:59", "log_file": "test_log_file.log", "ansible_os_family": "REDHAT", + "function": "parse_logs", } self.check_mode = False @@ -155,3 +156,62 @@ def exit_json(self, **kwargs): monkey_patch.setattr("src.modules.log_parser.AnsibleModule", MockAnsibleModule) main() assert mock_result["status"] == "FAILED" + + def test_merge_logs_success(self, log_parser_redhat): + """ + Test the merge_logs method for successful log merging. + + :param log_parser_redhat: LogParser instance. + :type log_parser_redhat: LogParser + """ + log_parser_redhat.logs = [ + '["Jan 01 12:34:56 server1 pacemaker-controld: Notice: ' + 'Resource SAPHana_HDB_00 started"]', + '["Jan 01 12:35:00 server2 pacemaker-controld: Notice: ' + 'Resource SAPHana_HDB_01 started"]', + '["Jan 01 12:36:00 server3 pacemaker-controld: Notice: ' + 'Resource SAPHana_HDB_02 started"]', + ] + + log_parser_redhat.merge_logs() + result = log_parser_redhat.get_result() + + filtered_logs = [log.strip() for log in json.loads(result["filtered_logs"])] + assert len(filtered_logs) == len(log_parser_redhat.logs) + assert result["status"] == "PASSED" + + def test_merge_logs_empty_input(self, log_parser_redhat): + """ + Test the merge_logs method with empty input. + + :param log_parser_redhat: LogParser instance. + :type log_parser_redhat: LogParser + """ + log_parser_redhat.logs = [] + + log_parser_redhat.merge_logs() + result = log_parser_redhat.get_result() + + assert json.loads(result["filtered_logs"]) == [] + assert result["status"] == "PASSED" + assert result["message"] == "No logs provided to merge" + + def test_merge_logs_invalid_json(self, log_parser_redhat): + """ + Test the merge_logs method with invalid JSON strings. + + :param log_parser_redhat: LogParser instance. + :type log_parser_redhat: LogParser + """ + log_parser_redhat.logs = [ + '["Jan 01 12:34:56 server1 pacemaker-controld: Notice: ' + 'Resource SAPHana_HDB_00 started"]', + "Invalid JSON string", + ] + + log_parser_redhat.merge_logs() + result = log_parser_redhat.get_result() + + filtered_logs = [log.strip() for log in json.loads(result["filtered_logs"])] + assert len(filtered_logs) == 2 + assert result["status"] == "PASSED" diff --git a/tests/roles/ha_db_hana/az_lb_test.py b/tests/roles/ha_db_hana/az_lb_test.py index 7f6d4f64..2fba9f52 100644 --- a/tests/roles/ha_db_hana/az_lb_test.py +++ b/tests/roles/ha_db_hana/az_lb_test.py @@ -71,7 +71,7 @@ def test_environment(self, ansible_inventory): yield temp_dir shutil.rmtree(temp_dir) - def test_ha_config_validation_success(self, test_environment, ansible_inventory): + def test_az_lb_validation_success(self, test_environment, ansible_inventory): """ Test the Azure LB configuration validation tasks using Ansible Runner. diff --git a/tests/roles/ha_scs/ascs_migration_test.py b/tests/roles/ha_scs/ascs_migration_test.py index 4ad9d623..9d48f6ae 100644 --- a/tests/roles/ha_scs/ascs_migration_test.py +++ b/tests/roles/ha_scs/ascs_migration_test.py @@ -9,6 +9,7 @@ mocks necessary Python modules and commands, and verifies the execution of the tasks. """ +import os import shutil from pathlib import Path import pytest @@ -45,7 +46,13 @@ def test_environment(self, ansible_inventory): :ytype: str """ + os.environ["TASK_NAME"] = "ascs-migration" + task_counter_file = "/tmp/get_cluster_status_counter_ascs-migration" + if os.path.exists(task_counter_file): + os.remove(task_counter_file) + commands = [ + {"name": "get_sap_instance_resource_id", "SUSE": "cibadmin --query --scope resources"}, { "name": "ascs_resource_migration_cmd", "SUSE": "crm resource migrate SAP_ASCS00_ascs00 scs02", @@ -67,6 +74,7 @@ def test_environment(self, ansible_inventory): "project/library/send_telemetry_data", "bin/crm_resource", "bin/crm", + "bin/cibadmin", ], extra_vars_override={"commands": commands, "node_tier": "scs"}, ) diff --git a/tests/roles/ha_scs/ascs_node_crash_test.py b/tests/roles/ha_scs/ascs_node_crash_test.py index 1090479f..47783941 100644 --- a/tests/roles/ha_scs/ascs_node_crash_test.py +++ b/tests/roles/ha_scs/ascs_node_crash_test.py @@ -9,6 +9,7 @@ mocks necessary Python modules and commands, and verifies the execution of the tasks. """ +import os import shutil from pathlib import Path import pytest @@ -44,6 +45,10 @@ def test_environment(self, ansible_inventory): :yield temp_dir: Path to the temporary test environment. :ytype: str """ + os.environ["TASK_NAME"] = "ascs-node-crash" + task_counter_file = "/tmp/get_cluster_status_counter_ascs-node-crash" + if os.path.exists(task_counter_file): + os.remove(task_counter_file) temp_dir = self.setup_test_environment( role_type="ha_scs", ansible_inventory=ansible_inventory, diff --git a/tests/roles/ha_scs/ha_failover_to_node_test.py b/tests/roles/ha_scs/ha_failover_to_node_test.py new file mode 100644 index 00000000..5cb00b79 --- /dev/null +++ b/tests/roles/ha_scs/ha_failover_to_node_test.py @@ -0,0 +1,133 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Test class for HAFailoverToNode tasks. + +This test class uses pytest to run functional tests on the HAFailoverToNode tasks +defined in roles/ha_scs/tasks/ha-failover-to-node.yml. It sets up a temporary test environment, +mocks necessary Python modules and commands, and verifies the execution of the tasks. +""" + +import os +import shutil +from pathlib import Path +import pytest +from tests.roles.ha_scs.roles_testing_base_scs import RolesTestingBaseSCS + + +class TestHAFailoverToNode(RolesTestingBaseSCS): + """ + Test class for HAFailoverToNode tasks. + """ + + @pytest.fixture + def ha_failover_to_node_tasks(self): + """ + Load the HAFailoverToNode tasks from the YAML file. + + :return: Parsed YAML content of the tasks file. + :rtype: dict + """ + return self.file_operations( + operation="read", + file_path=Path(__file__).parent.parent.parent + / "src/roles/ha_scs/tasks/ha-failover-to-node.yml", + ) + + @pytest.fixture + def test_environment(self, ansible_inventory): + """ + Set up a temporary test environment for the HAFailoverToNode tasks. + + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + :yield temp_dir: Path to the temporary test environment. + :ytype: str + """ + os.environ["TASK_NAME"] = "ha-failover-to-node" + task_counter_file = "/tmp/get_cluster_status_counter_ha-failover-to-node" + if os.path.exists(task_counter_file): + os.remove(task_counter_file) + + temp_dir = self.setup_test_environment( + role_type="ha_scs", + ansible_inventory=ansible_inventory, + task_name="ha-failover-to-node", + task_description="The HAFailoverToNode test validates SAP's built-in " + "high availability functionality", + module_names=[ + "project/library/get_cluster_status_scs", + "project/library/log_parser", + "project/library/send_telemetry_data", + "bin/crm_resource", + "bin/crm", + "bin/sapcontrol", + ], + extra_vars_override={ + "node_tier": "scs", + "commands": [ + { + "name": "ascs_resource_unmigrate_cmd", + "SUSE": "crm resource clear SAP_ASCS00_ascs00", + }, + ], + }, + ) + + yield temp_dir + shutil.rmtree(temp_dir) + + def test_functional_ha_failover_to_node_success(self, test_environment, ansible_inventory): + """ + Test the HAFailoverToNode tasks using Ansible Runner. + + :param test_environment: Path to the temporary test environment. + :type test_environment: str + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + """ + result = self.run_ansible_playbook( + test_environment=test_environment, inventory_file_name="inventory_scs.txt" + ) + + assert result.rc == 0, ( + f"Playbook failed with status: {result.rc}\n" + f"STDOUT: {result.stdout.read() if result.stdout else 'No output'}\n" + f"STDERR: {result.stderr.read() if result.stderr else 'No errors'}\n" + f"Events: {[e.get('event') for e in result.events if 'event' in e]}" + ) + + ok_events, failed_events = [], [] + for event in result.events: + if event.get("event") == "runner_on_ok": + ok_events.append(event) + elif event.get("event") == "runner_on_failed": + failed_events.append(event) + + assert len(ok_events) > 0 + assert len(failed_events) == 0 + + failover_executed = False + validate_executed = False + cleanup_executed = False + post_status = {} + pre_status = {} + + for event in ok_events: + task = event.get("event_data", {}).get("task") + if task and "Execute HAFailoverToNode" in task: + failover_executed = True + elif task and "Test Execution: Validate SCS cluster status" in task: + validate_executed = True + post_status = event.get("event_data", {}).get("res") + elif task and "Cleanup resources" in task: + cleanup_executed = True + elif task and "Pre Validation: Validate SCS" in task: + pre_status = event.get("event_data", {}).get("res") + + assert post_status.get("ascs_node") == pre_status.get("ers_node") + assert post_status.get("ers_node") == pre_status.get("ascs_node") + assert failover_executed, "HAFailoverToNode command was not executed" + assert validate_executed, "SCS cluster status validation task was not executed" + assert cleanup_executed, "Cleanup resources task was not executed" diff --git a/tests/roles/ha_scs/kill_enqueue_replication_test.py b/tests/roles/ha_scs/kill_enqueue_replication_test.py new file mode 100644 index 00000000..001bbd30 --- /dev/null +++ b/tests/roles/ha_scs/kill_enqueue_replication_test.py @@ -0,0 +1,126 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Test class for Kill Enqueue Replication Server Process tasks. + +This test class uses pytest to run functional tests on the kill-enqueue-replication tasks +defined in roles/ha_scs/tasks/kill-enqueue-replication.yml. It sets up a temporary test environment, +mocks necessary Python modules and commands, and verifies the execution of the tasks. +""" + +import os +import shutil +from pathlib import Path +import pytest +from tests.roles.ha_scs.roles_testing_base_scs import RolesTestingBaseSCS + + +class TestKillEnqueueReplicationServer(RolesTestingBaseSCS): + """ + Test class for Kill Enqueue Replication Server Process tasks. + """ + + @pytest.fixture + def kill_enqueue_replication_tasks(self): + """ + Load the Kill Enqueue Replication Server tasks from the YAML file. + + :return: Parsed YAML content of the tasks file. + :rtype: dict + """ + return self.file_operations( + operation="read", + file_path=Path(__file__).parent.parent.parent + / "src/roles/ha_scs/tasks/kill-enqueue-replication.yml", + ) + + @pytest.fixture + def test_environment(self, ansible_inventory): + """ + Set up a temporary test environment for the Kill Enqueue Replication Server tasks. + + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + :yield temp_dir: Path to the temporary test environment. + :ytype: str + """ + os.environ["TASK_NAME"] = "kill-enqueue-replication" + task_counter_file = "/tmp/get_cluster_status_counter_kill-enqueue-replication" + if os.path.exists(task_counter_file): + os.remove(task_counter_file) + + temp_dir = self.setup_test_environment( + role_type="ha_scs", + ansible_inventory=ansible_inventory, + task_name="kill-enqueue-replication", + task_description="The Enqueue Replication Server Process Kill test " + "simulates failure of the replication server process", + module_names=[ + "project/library/get_cluster_status_scs", + "project/library/log_parser", + "project/library/send_telemetry_data", + "bin/crm_resource", + "bin/pgrep", + "bin/kill", + ], + extra_vars_override={"node_tier": "ers"}, + ) + + yield temp_dir + shutil.rmtree(temp_dir) + + def test_functional_kill_enqueue_replication_success(self, test_environment, ansible_inventory): + """ + Test the Kill Enqueue Replication Server tasks using Ansible Runner. + + :param test_environment: Path to the temporary test environment. + :type test_environment: str + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + """ + result = self.run_ansible_playbook( + test_environment=test_environment, inventory_file_name="inventory_scs.txt" + ) + + assert result.rc == 0, ( + f"Playbook failed with status: {result.rc}\n" + f"STDOUT: {result.stdout.read() if result.stdout else 'No output'}\n" + f"STDERR: {result.stderr.read() if result.stderr else 'No errors'}\n" + f"Events: {[e.get('event') for e in result.events if 'event' in e]}" + ) + + ok_events, failed_events = [], [] + for event in result.events: + if event.get("event") == "runner_on_ok": + ok_events.append(event) + elif event.get("event") == "runner_on_failed": + failed_events.append(event) + + assert len(ok_events) > 0 + assert len(failed_events) == 0 + + kill_executed = False + validate_executed = False + cleanup_executed = False + post_status = {} + pre_status = {} + + for event in ok_events: + task = event.get("event_data", {}).get("task") + if task and "Kill Enqueue Replication Server Process" in task: + kill_executed = True + elif task and "Test Execution: Validate SCS cluster status" in task: + validate_executed = True + post_status = event.get("event_data", {}).get("res") + elif task and "Cleanup resources" in task: + cleanup_executed = True + elif task and "Pre Validation: Validate SCS" in task: + pre_status = event.get("event_data", {}).get("res") + + assert post_status.get("ascs_node") == pre_status.get("ascs_node") + assert post_status.get("ers_node") == pre_status.get("ers_node") + + assert kill_executed, "Kill enqueue replication server process task was not executed" + assert validate_executed, "SCS cluster status validation task was not executed" + assert cleanup_executed, "Cleanup resources task was not executed" diff --git a/tests/roles/ha_scs/kill_enqueue_server_test.py b/tests/roles/ha_scs/kill_enqueue_server_test.py new file mode 100644 index 00000000..5845ac02 --- /dev/null +++ b/tests/roles/ha_scs/kill_enqueue_server_test.py @@ -0,0 +1,126 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Test class for Kill Enqueue Server Process tasks. + +This test class uses pytest to run functional tests on the kill-enqueue-server tasks +defined in roles/ha_scs/tasks/kill-enqueue-server.yml. It sets up a temporary test environment, +mocks necessary Python modules and commands, and verifies the execution of the tasks. +""" + +import os +import shutil +from pathlib import Path +import pytest +from tests.roles.ha_scs.roles_testing_base_scs import RolesTestingBaseSCS + + +class TestKillEnqueueServer(RolesTestingBaseSCS): + """ + Test class for Kill Enqueue Server Process tasks. + """ + + @pytest.fixture + def kill_enqueue_server_tasks(self): + """ + Load the Kill Enqueue Server tasks from the YAML file. + + :return: Parsed YAML content of the tasks file. + :rtype: dict + """ + return self.file_operations( + operation="read", + file_path=Path(__file__).parent.parent.parent + / "src/roles/ha_scs/tasks/kill-enqueue-server.yml", + ) + + @pytest.fixture + def test_environment(self, ansible_inventory): + """ + Set up a temporary test environment for the Kill Enqueue Server tasks. + + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + :yield temp_dir: Path to the temporary test environment. + :ytype: str + """ + + os.environ["TASK_NAME"] = "kill-enqueue-server" + task_counter_file = "/tmp/get_cluster_status_counter_kill-enqueue-server" + if os.path.exists(task_counter_file): + os.remove(task_counter_file) + + temp_dir = self.setup_test_environment( + role_type="ha_scs", + ansible_inventory=ansible_inventory, + task_name="kill-enqueue-server", + task_description="The Enqueue Server Process Kill test simulates failure of the enqueue server process", + module_names=[ + "project/library/get_cluster_status_scs", + "project/library/log_parser", + "project/library/send_telemetry_data", + "bin/crm_resource", + "bin/pgrep", + "bin/kill", + ], + extra_vars_override={"node_tier": "scs"}, + ) + + yield temp_dir + shutil.rmtree(temp_dir) + + def test_functional_kill_enqueue_server_success(self, test_environment, ansible_inventory): + """ + Test the Kill Enqueue Server tasks using Ansible Runner. + + :param test_environment: Path to the temporary test environment. + :type test_environment: str + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + """ + result = self.run_ansible_playbook( + test_environment=test_environment, inventory_file_name="inventory_scs.txt" + ) + + assert result.rc == 0, ( + f"Playbook failed with status: {result.rc}\n" + f"STDOUT: {result.stdout.read() if result.stdout else 'No output'}\n" + f"STDERR: {result.stderr.read() if result.stderr else 'No errors'}\n" + f"Events: {[e.get('event') for e in result.events if 'event' in e]}" + ) + + ok_events, failed_events = [], [] + for event in result.events: + if event.get("event") == "runner_on_ok": + ok_events.append(event) + elif event.get("event") == "runner_on_failed": + failed_events.append(event) + + assert len(ok_events) > 0 + assert len(failed_events) == 0 + + kill_executed = False + validate_executed = False + cleanup_executed = False + post_status = {} + pre_status = {} + + for event in ok_events: + task = event.get("event_data", {}).get("task") + if task and "Kill Enqueue Server Process" in task: + kill_executed = True + elif task and "Test Execution: Validate SCS cluster status" in task: + validate_executed = True + post_status = event.get("event_data", {}).get("res") + elif task and "Cleanup resources" in task: + cleanup_executed = True + elif task and "Pre Validation: Validate SCS" in task: + pre_status = event.get("event_data", {}).get("res") + + assert post_status.get("ascs_node") == pre_status.get("ers_node") + assert post_status.get("ers_node") == pre_status.get("ascs_node") + + assert kill_executed, "Kill enqueue server process task was not executed" + assert validate_executed, "SCS cluster status validation task was not executed" + assert cleanup_executed, "Cleanup resources task was not executed" diff --git a/tests/roles/ha_scs/kill_message_server_test.py b/tests/roles/ha_scs/kill_message_server_test.py new file mode 100644 index 00000000..28e944da --- /dev/null +++ b/tests/roles/ha_scs/kill_message_server_test.py @@ -0,0 +1,130 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Test class for Kill Message Server Process tasks. + +This test class uses pytest to run functional tests on the kill-message-server tasks +defined in roles/ha_scs/tasks/kill-message-server.yml. It sets up a temporary test environment, +mocks necessary Python modules and commands, and verifies the execution of the tasks. +""" + +import os +import shutil +from pathlib import Path +import pytest +from tests.roles.ha_scs.roles_testing_base_scs import RolesTestingBaseSCS + + +class TestKillMessageServer(RolesTestingBaseSCS): + """ + Test class for Kill Message Server Process tasks. + """ + + @pytest.fixture + def kill_message_server_tasks(self): + """ + Load the Kill Message Server tasks from the YAML file. + + :return: Parsed YAML content of the tasks file. + :rtype: dict + """ + return self.file_operations( + operation="read", + file_path=Path(__file__).parent.parent.parent + / "src/roles/ha_scs/tasks/kill-message-server.yml", + ) + + @pytest.fixture + def test_environment(self, ansible_inventory): + """ + Set up a temporary test environment for the Kill Message Server tasks. + + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + :yield temp_dir: Path to the temporary test environment. + :ytype: str + """ + + os.environ["TASK_NAME"] = "kill-message-server" + task_counter_file = "/tmp/get_cluster_status_counter_kill-message-server" + if os.path.exists(task_counter_file): + os.remove(task_counter_file) + + temp_dir = self.setup_test_environment( + role_type="ha_scs", + ansible_inventory=ansible_inventory, + task_name="kill-message-server", + task_description="The Message Server Process Kill test simulates failure of the message server process", + module_names=[ + "project/library/get_cluster_status_scs", + "project/library/log_parser", + "project/library/send_telemetry_data", + "bin/crm_resource", + "bin/pgrep", + "bin/kill", + ], + extra_vars_override={"node_tier": "scs"}, + ) + + yield temp_dir + shutil.rmtree(temp_dir) + + def test_functional_kill_message_server_success(self, test_environment, ansible_inventory): + """ + Test the Kill Message Server tasks using Ansible Runner. + + :param test_environment: Path to the temporary test environment. + :type test_environment: str + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + """ + result = self.run_ansible_playbook( + test_environment=test_environment, inventory_file_name="inventory_scs.txt" + ) + + assert result.rc == 0, ( + f"Playbook failed with status: {result.rc}\n" + f"STDOUT: {result.stdout.read() if result.stdout else 'No output'}\n" + f"STDERR: {result.stderr.read() if result.stderr else 'No errors'}\n" + f"Events: {[e.get('event') for e in result.events if 'event' in e]}" + ) + + ok_events, failed_events = [], [] + for event in result.events: + if event.get("event") == "runner_on_ok": + ok_events.append(event) + elif event.get("event") == "runner_on_failed": + failed_events.append(event) + + assert len(ok_events) > 0 + # One failed event is expected due to the kill command + assert len(failed_events) == 1 + + kill_executed = False + validate_executed = False + cleanup_executed = False + post_status = {} + pre_status = {} + + for event in failed_events: + task = event.get("event_data", {}).get("task") + if task and "Kill Message Server Process" in task: + kill_executed = True + + for event in ok_events: + task = event.get("event_data", {}).get("task") + if task and "Test Execution: Validate SCS cluster status" in task: + validate_executed = True + post_status = event.get("event_data", {}).get("res") + elif task and "Cleanup resources" in task: + cleanup_executed = True + elif task and "Pre Validation: Validate SCS" in task: + pre_status = event.get("event_data", {}).get("res") + + assert post_status.get("ascs_node") == pre_status.get("ers_node") + assert post_status.get("ers_node") == pre_status.get("ascs_node") + + assert kill_executed, "Kill message server process task was not executed" + assert validate_executed, "SCS cluster status validation task was not executed" + assert cleanup_executed, "Cleanup resources task was not executed" diff --git a/tests/roles/ha_scs/kill_sapstartsrv_test.py b/tests/roles/ha_scs/kill_sapstartsrv_test.py new file mode 100644 index 00000000..331fd26a --- /dev/null +++ b/tests/roles/ha_scs/kill_sapstartsrv_test.py @@ -0,0 +1,125 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Test class for Kill SAPStartsrv tasks. + +This test class uses pytest to run functional tests on the kill-sapstartsrv tasks +defined in roles/ha_scs/tasks/kill-sapstartsrv-process.yml. It sets up a temporary test environment, +mocks necessary Python modules and commands, and verifies the execution of the tasks. +""" + +import os +import shutil +from pathlib import Path +import pytest +from tests.roles.ha_scs.roles_testing_base_scs import RolesTestingBaseSCS + + +class TestKillSapStartSrv(RolesTestingBaseSCS): + """ + Test class for Kill SAPStartsrv tasks. + """ + + @pytest.fixture + def kill_sapstartsrv_tasks(self): + """ + Load the Kill SAPStartsrv tasks from the YAML file. + + :return: Parsed YAML content of the tasks file. + :rtype: dict + """ + return self.file_operations( + operation="read", + file_path=Path(__file__).parent.parent.parent + / "src/roles/ha_scs/tasks/kill-sapstartsrv-process.yml", + ) + + @pytest.fixture + def test_environment(self, ansible_inventory): + """ + Set up a temporary test environment for the Kill SAPStartsrv tasks. + + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + :yield temp_dir: Path to the temporary test environment. + :ytype: str + """ + + os.environ["TASK_NAME"] = "kill-sapstartsrv-process" + task_counter_file = "/tmp/get_cluster_status_counter_kill-sapstartsrv-process" + if os.path.exists(task_counter_file): + os.remove(task_counter_file) + + temp_dir = self.setup_test_environment( + role_type="ha_scs", + ansible_inventory=ansible_inventory, + task_name="kill-sapstartsrv-process", + task_description="The SAP startsrv Process Kill test simulates " + + "failure of the sapstartsrv process", + module_names=[ + "project/library/get_cluster_status_scs", + "project/library/log_parser", + "project/library/send_telemetry_data", + "bin/crm_resource", + "bin/pgrep", + "bin/kill", + ], + extra_vars_override={"node_tier": "scs"}, + ) + + playbook_content = self.file_operations( + operation="read", + file_path=f"{temp_dir}/project/roles/ha_scs/tasks/kill-sapstartsrv-process.yml", + ) + self.file_operations( + operation="write", + file_path=f"{temp_dir}/project/roles/ha_scs/tasks/kill-sapstartsrv-process.yml", + content=playbook_content.replace("set -o pipefail &&", ""), + ) + + yield temp_dir + shutil.rmtree(temp_dir) + + def test_functional_kill_sapstartsrv_success(self, test_environment, ansible_inventory): + """ + Test the Kill SAPStartsrv tasks using ansible Runner. + + :param test_environment: Path to the temporary test environment. + :type test_environment: str + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + """ + result = self.run_ansible_playbook( + test_environment=test_environment, inventory_file_name="inventory_scs.txt" + ) + + assert result.rc == 0, ( + f"Playbook failed with status: {result.rc}\n" + f"STDOUT: {result.stdout.read() if result.stdout else 'No output'}\n" + f"STDERR: {result.stderr.read() if result.stderr else 'No errors'}\n" + f"Events: {[e.get('event') for e in result.events if 'event' in e]}" + ) + + ok_events, failed_events = [], [] + for event in result.events: + if event.get("event") == "runner_on_ok": + ok_events.append(event) + elif event.get("event") == "runner_on_failed": + failed_events.append(event) + + assert len(ok_events) > 0 + assert len(failed_events) == 0 + + sapstartsrv_executed = False + sapstartsrv_executed_post = False + for event in ok_events: + task = event.get("event_data", {}).get("task") + print(task) + if "Test Execution: Kill sapstartsrv Process" in task: + sapstartsrv_executed = True + elif "Find sapstartsrv PID after killing the process" in task: + sapstartsrv_executed_post = True + + assert sapstartsrv_executed, "SAPStartsrv process was not killed" + assert sapstartsrv_executed_post, "SAPStartsrv process was not found after killing it" diff --git a/tests/roles/ha_scs/manual_restart_test.py b/tests/roles/ha_scs/manual_restart_test.py new file mode 100644 index 00000000..fc881356 --- /dev/null +++ b/tests/roles/ha_scs/manual_restart_test.py @@ -0,0 +1,128 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Test class for Manual Restart of ASCS instance tasks. + +This test class uses pytest to run functional tests on the Manual Restart tasks +defined in roles/ha_scs/tasks/manual-restart.yml. It sets up a temporary test environment, +mocks necessary Python modules and commands, and verifies the execution of the tasks. +""" +import os +import shutil +from pathlib import Path +import pytest +from tests.roles.ha_scs.roles_testing_base_scs import RolesTestingBaseSCS + + +class TestManualRestart(RolesTestingBaseSCS): + """ + Test class for Manual Restart of ASCS instance tasks. + """ + + @pytest.fixture + def manual_restart_tasks(self): + """ + Load the Manual Restart tasks from the YAML file. + + :return: Parsed YAML content of the tasks file. + :rtype: dict + """ + return self.file_operations( + operation="read", + file_path=Path(__file__).parent.parent.parent + / "src/roles/ha_scs/tasks/manual-restart.yml", + ) + + @pytest.fixture + def test_environment(self, ansible_inventory): + """ + Set up a temporary test environment for the Manual Restart tasks. + + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + :yield temp_dir: Path to the temporary test environment. + :ytype: str + """ + os.environ["TASK_NAME"] = "manual-restart" + task_counter_file = "/tmp/get_cluster_status_counter_manual-restart" + if os.path.exists(task_counter_file): + os.remove(task_counter_file) + + temp_dir = self.setup_test_environment( + role_type="ha_scs", + ansible_inventory=ansible_inventory, + task_name="manual-restart", + task_description="The Manual Restart test validates cluster " + "behavior when the ASCS instance is manually stopped", + module_names=[ + "project/library/get_cluster_status_scs", + "project/library/log_parser", + "project/library/send_telemetry_data", + "bin/crm_resource", + "bin/sapcontrol", + ], + extra_vars_override={"node_tier": "scs"}, + ) + + yield temp_dir + if "TASK_NAME" in os.environ: + del os.environ["TASK_NAME"] + shutil.rmtree(temp_dir) + + def test_functional_manual_restart_success(self, test_environment, ansible_inventory): + """ + Test the Manual Restart tasks using Ansible Runner. + + :param test_environment: Path to the temporary test environment. + :type test_environment: str + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + """ + result = self.run_ansible_playbook( + test_environment=test_environment, inventory_file_name="inventory_scs.txt" + ) + + assert result.rc == 0, ( + f"Playbook failed with status: {result.rc}\n" + f"STDOUT: {result.stdout.read() if result.stdout else 'No output'}\n" + f"STDERR: {result.stderr.read() if result.stderr else 'No errors'}\n" + f"Events: {[e.get('event') for e in result.events if 'event' in e]}" + ) + + ok_events, failed_events = [], [] + for event in result.events: + if event.get("event") == "runner_on_ok": + ok_events.append(event) + elif event.get("event") == "runner_on_failed": + failed_events.append(event) + + assert len(ok_events) > 0 + assert len(failed_events) == 0 + + stop_executed = False + validate_executed = False + start_executed = False + cleanup_executed = False + post_status = {} + pre_status = {} + + for event in ok_events: + task = event.get("event_data", {}).get("task") + if task and "Stop ASCS Instance" in task: + stop_executed = True + elif task and "Test Execution: Validate SCS cluster status" in task: + validate_executed = True + post_status = event.get("event_data", {}).get("res") + elif task and "Start ASCS Instance" in task: + start_executed = True + elif task and "Cleanup resources" in task: + cleanup_executed = True + elif task and "Pre Validation: Validate SCS" in task: + pre_status = event.get("event_data", {}).get("res") + + assert post_status.get("ascs_node") == pre_status.get("ascs_node") + assert stop_executed, "Stop ASCS Instance task was not executed" + assert validate_executed, "SCS cluster status validation task was not executed" + assert start_executed, "Start ASCS Instance task was not executed" + assert cleanup_executed, "Cleanup resources task was not executed" diff --git a/tests/roles/ha_scs/sapcontrol_config_test.py b/tests/roles/ha_scs/sapcontrol_config_test.py new file mode 100644 index 00000000..1138b06b --- /dev/null +++ b/tests/roles/ha_scs/sapcontrol_config_test.py @@ -0,0 +1,144 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Test class for SAPControl Config Validation tasks. + +This test class uses pytest to run functional tests on the sapcontrol-config tasks +defined in roles/ha_scs/tasks/sapcontrol-config.yml. It sets up a temporary test environment, +mocks necessary Python modules and commands, and verifies the execution of the tasks. +""" + +import os +import sys +import shutil +from pathlib import Path +import pytest +from tests.roles.ha_scs.roles_testing_base_scs import RolesTestingBaseSCS + + +class TestSAPControlConfig(RolesTestingBaseSCS): + """ + Test class for SAPControl Config Validation tasks. + """ + + @pytest.fixture + def sapcontrol_config_tasks(self): + """ + Load the SAPControl Config Validation tasks from the YAML file. + + :return: Parsed YAML content of the tasks file. + :rtype: dict + """ + return self.file_operations( + operation="read", + file_path=Path(__file__).parent.parent.parent + / "src/roles/ha_scs/tasks/sapcontrol-config.yml", + ) + + @pytest.fixture + def test_environment(self, ansible_inventory): + """ + Set up a temporary test environment for the SAPControl Config Validation tasks. + + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + :yield temp_dir: Path to the temporary test environment. + :ytype: str + """ + + class Expression: + def __init__(self, expression): + self.expression = expression + + def search(self, data): + return [] + + class Functions: + pass + + class JMESPath: + def search(self, expression, data): + return [] + + def compile(self, expression): + return Expression(expression) + + sys.modules["jmespath"] = JMESPath() + sys.modules["jmespath.functions"] = Functions() + + import subprocess + + subprocess.check_call([sys.executable, "-m", "pip", "install", "jmespath"]) + sys.modules["jmespath"] = JMESPath() + os.environ["TASK_NAME"] = "sapcontrol-config" + task_counter_file = "/tmp/get_cluster_status_counter_sapcontrol-config" + if os.path.exists(task_counter_file): + os.remove(task_counter_file) + + temp_dir = self.setup_test_environment( + role_type="ha_scs", + ansible_inventory=ansible_inventory, + task_name="sapcontrol-config", + task_description="The SAPControl Config Validation test runs multiple sapcontrol commands", + module_names=[ + "project/library/get_cluster_status_scs", + "project/library/log_parser", + "project/library/send_telemetry_data", + "bin/crm_resource", + "bin/sapcontrol", + "bin/jmespath", + ], + extra_vars_override={"node_tier": "scs"}, + ) + + yield temp_dir + shutil.rmtree(temp_dir) + + def test_functional_sapcontrol_config_success(self, test_environment, ansible_inventory): + """ + Test the SAPControl Config Validation tasks using Ansible Runner. + + :param test_environment: Path to the temporary test environment. + :type test_environment: str + :param ansible_inventory: Path to the Ansible inventory file. + :type ansible_inventory: str + """ + result = self.run_ansible_playbook( + test_environment=test_environment, inventory_file_name="inventory_scs.txt" + ) + + assert result.rc == 0, ( + f"Playbook failed with status: {result.rc}\n" + f"STDOUT: {result.stdout.read() if result.stdout else 'No output'}\n" + f"STDERR: {result.stderr.read() if result.stderr else 'No errors'}\n" + f"Events: {[e.get('event') for e in result.events if 'event' in e]}" + ) + + ok_events, failed_events = [], [] + for event in result.events: + if event.get("event") == "runner_on_ok": + ok_events.append(event) + elif event.get("event") == "runner_on_failed": + failed_events.append(event) + + assert len(ok_events) > 0 + assert len(failed_events) == 0 + + sapcontrol_executed = False + test_fact_set = False + pre_validate_executed = False + + for event in ok_events: + task = event.get("event_data", {}).get("task") + print(task) + if task and "Run sapcontrol commands" in task: + sapcontrol_executed = True + elif task and "Test Execution: Validate sapcontrol commands" in task: + test_fact_set = True + elif task and "Pre Validation: Validate SCS" in task: + pre_validate_executed = True + + assert sapcontrol_executed, "SAPControl commands were not executed" + assert test_fact_set, "Test execution facts were not set" + assert pre_validate_executed, "Pre-validation task was not executed" diff --git a/tests/roles/mock_data/get_azure_lb.txt b/tests/roles/mock_data/get_azure_lb.txt index e89e86c5..7f070eae 100644 --- a/tests/roles/mock_data/get_azure_lb.txt +++ b/tests/roles/mock_data/get_azure_lb.txt @@ -8,6 +8,7 @@ def main(): region=dict(type="str", required=True), inbound_rules=dict(type="str", required=True), constants=dict(type="dict", required=True), + msi_client_id=dict(type="str", required=False) ) ) diff --git a/tests/roles/mock_data/get_cluster_status_scs.txt b/tests/roles/mock_data/get_cluster_status_scs.txt index e44a4602..a868a4ab 100644 --- a/tests/roles/mock_data/get_cluster_status_scs.txt +++ b/tests/roles/mock_data/get_cluster_status_scs.txt @@ -3,16 +3,19 @@ from ansible.module_utils.basic import AnsibleModule import os +import json def main(): module = AnsibleModule( argument_spec=dict( sap_sid=dict(type='str', required=True), - ansible_os_family=dict(type='str', required=True) + ansible_os_family=dict(type='str', required=True), ) ) - counter_file = "/tmp/get_cluster_status_counter" + task_name = os.environ.get('TASK_NAME', '') + + counter_file = f"/tmp/get_cluster_status_counter_{task_name}" if task_name else "/tmp/get_cluster_status_counter" if os.path.exists(counter_file): with open(counter_file, "r") as f: @@ -23,24 +26,121 @@ def main(): counter += 1 with open(counter_file, "w") as f: f.write(str(counter)) + + # Define initial and final states that are common + initial_state = { + "changed": False, + "ascs_node": "scs01", + "ers_node": "scs02", + "status": "PASSED", + "pacemaker_status": "running" + } + + swapped_state = { + "changed": False, + "ascs_node": "scs02", + "ers_node": "scs01", + "status": "PASSED", + "pacemaker_status": "running" + } - if counter == 3: - result = { - "changed": False, - "ascs_node": "scs01", - "ers_node": "scs02", - "status": "PASSED", - "pacemaker_status": "running" - } - else: - result = { - "changed": False, - "ascs_node": "scs02", - "ers_node": "scs01", - "status": "PASSED", - "pacemaker_status": "running" - } + # Define intermediate states for specific test cases + empty_ascs_state = { + "changed": False, + "ascs_node": "", + "ers_node": "scs02", + "status": "PASSED", + "pacemaker_status": "running" + } + empty_ers_state = { + "changed": False, + "ascs_node": "scs01", + "ers_node": "", + "status": "PASSED", + "pacemaker_status": "running" + } + + # Define the sequence of states for each test case + # Each task has an array of states that will be returned in sequence + # as the mock is called multiple times during the test + test_sequences = { + "manual-restart": [ + initial_state, # Initial call - pre-validation + initial_state, # Initial call - pre-validation + empty_ascs_state, # After stopping ASCS - ASCS node is empty + initial_state, # Final state after restart (roles swapped) + ], + "ascs-migration": [ + initial_state, # Initial call - pre-validation + initial_state, # During migration + swapped_state, # After migration validation (first check) + ], + "ascs-node-crash": [ + initial_state, # Initial pre-validation + initial_state, # Initial pre-validation + empty_ascs_state, # After node crash + swapped_state, # Additional validation checks + ], + "kill-message-server": [ + initial_state, # Initial pre-validation + initial_state, # Initial pre-validation + empty_ascs_state, # After process kill + swapped_state, # Final Check + ], + "kill-enqueue-server": [ + initial_state, # Initial pre-validation + initial_state, # Initial pre-validation + empty_ascs_state, # After process kill + swapped_state, # Additional validation checks + ], + "kill-enqueue-replication": [ + initial_state, # Initial pre-validation + initial_state, # After killing ERS process - no node swap expected + empty_ers_state, # Should remain the same (no node swap for this test) + initial_state, # Final check + ], + "kill-sapstartsrv-process": [ + initial_state, # Initial pre-validation + initial_state, # Initial pre-validation + initial_state, # Final check + ], + "sapcontrol-config": [ + initial_state, + initial_state, # Further calls - should remain the same + ], + "ha-failover-to-node": [ + initial_state, # Initial pre-validation + initial_state, # Initial pre-validation + swapped_state, # After HAFailoverToNode + ], + "block-network": [ + initial_state, # Initial pre-validation + swapped_state, # After network block + swapped_state, # Additional validation checks + swapped_state, # Final check + ] + } + + # Define fallback sequence if test case isn't defined + default_sequence = [ + initial_state, # First call + swapped_state, # Second call + swapped_state, # Third call onwards + ] + + # Get the correct sequence for this test case + sequence = test_sequences.get(task_name, default_sequence) + + # Get the appropriate state based on counter + # If we've reached the end of the defined sequence, keep returning the last state + index = min(counter - 1, len(sequence) - 1) + result = sequence[index] + + # Log the request and response for debugging + with open("/tmp/get_cluster_status_debug.log", "a") as f: + f.write(f"Task: {task_name}, Call #: {counter}, Index: {index}, Result: {json.dumps(result)}\n") + module.exit_json(**result) if __name__ == '__main__': diff --git a/tests/roles/mock_data/get_pcmk_properties_scs.txt b/tests/roles/mock_data/get_pcmk_properties_scs.txt index 5657dfb7..57927b71 100644 --- a/tests/roles/mock_data/get_pcmk_properties_scs.txt +++ b/tests/roles/mock_data/get_pcmk_properties_scs.txt @@ -10,7 +10,8 @@ def main(): ansible_os_family=dict(type='str', required=True), virtual_machine_name=dict(type='str', required=True), pcmk_constants=dict(type='dict', required=True), - fencing_mechanism=dict(type='str', required=True) + fencing_mechanism=dict(type='str', required=True), + nfs_provider=dict(type='str', required=True) ) ) diff --git a/tests/roles/mock_data/jmespath.txt b/tests/roles/mock_data/jmespath.txt new file mode 100644 index 00000000..6942d697 --- /dev/null +++ b/tests/roles/mock_data/jmespath.txt @@ -0,0 +1,42 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +""" +Mock implementation of the jmespath module for testing purposes. +""" + +def search(expression, data): + """ + Mock implementation of jmespath.search that returns a default value for any expression. + + :param expression: JMESPath expression to evaluate + :param data: Data to search + :return: Empty list as a safe default value + """ + return [] + +def compile(expression): + """ + Mock implementation of jmespath.compile. + + :param expression: JMESPath expression to compile + :return: A callable object with a search method + """ + class Expression: + def search(self, data): + return [] + + return Expression() + +def options(**kwargs): + """ + Mock implementation of jmespath.options. + + :param kwargs: Option parameters + :return: Options object + """ + class Options: + def __init__(self, **kwargs): + self.options = kwargs + + return Options(**kwargs) diff --git a/tests/roles/mock_data/log_parser.txt b/tests/roles/mock_data/log_parser.txt index 3e4841fb..762bd3bb 100644 --- a/tests/roles/mock_data/log_parser.txt +++ b/tests/roles/mock_data/log_parser.txt @@ -7,11 +7,13 @@ from ansible.module_utils.basic import AnsibleModule def main(): module = AnsibleModule( argument_spec=dict( - start_time=dict(type='str', required=True), - end_time=dict(type='str', required=True), + start_time=dict(type='str', required=False), + end_time=dict(type='str', required=False), log_file=dict(type='str', required=False, default='/var/log/messages'), keywords=dict(type='list', required=False, default=[]), - ansible_os_family=dict(type='str', required=True) + ansible_os_family=dict(type='str', required=True), + function=dict(type='str', required=True), + logs=dict(type='list', required=False) ) ) diff --git a/tests/roles/mock_data/sapcontrol.txt b/tests/roles/mock_data/sapcontrol.txt new file mode 100644 index 00000000..3db3fa72 --- /dev/null +++ b/tests/roles/mock_data/sapcontrol.txt @@ -0,0 +1,38 @@ +#!/bin/bash + +echo "MOCK SAPCONTROL CALLED: $@" >> /tmp/sapcontrol_calls.log + +# Check if we're calling HAFailoverToNode +if [[ "$*" == *"HAFailoverToNode"* ]]; then + echo "Failover to node successfully triggered." + exit 0 +# Check if we're calling Stop +elif [[ "$*" == *"Stop"* ]]; then + echo "Instance stopped successfully." + exit 0 +# Check if we're calling Start +elif [[ "$*" == *"Start"* ]]; then + echo "Instance started successfully." + exit 0 +# Check if we're calling HAGetFailoverConfig +elif [[ "$*" == *"HAGetFailoverConfig"* ]]; then + echo "HAActive: TRUE" + echo "HAProductVersion: 1.0" + echo "HASAPInterfaceVersion: 1.0" + echo "ok" + exit 0 +# Check if we're calling HACheckFailoverConfig +elif [[ "$*" == *"HACheckFailoverConfig"* ]]; then + echo "Check of HA failover configuration successful." + echo "ok" + exit 0 +# Check if we're calling HACheckConfig +elif [[ "$*" == *"HACheckConfig"* ]]; then + echo "SAP HA configuration check successful." + echo "ok" + exit 0 +# For any other command, return failure +else + echo "Unknown sapcontrol command: $@" >> /tmp/sapcontrol_calls.log + exit 1 +fi diff --git a/tests/roles/roles_testing_base.py b/tests/roles/roles_testing_base.py index 5d009433..d9f43c8d 100644 --- a/tests/roles/roles_testing_base.py +++ b/tests/roles/roles_testing_base.py @@ -161,6 +161,7 @@ def setup_test_environment( "misc/tasks/rescue.yml", "misc/tasks/var-log-messages.yml", "misc/tasks/post-telemetry-data.yml", + "misc/tasks/loadbalancer.yml", ] task_file = f"{role_type}/tasks/{task_name}.yml" @@ -203,6 +204,9 @@ def setup_test_environment( "_workspace_directory": temp_dir, "ansible_distribution": "SUSE", "ansible_distribution_version": "15", + "default_retries": 2, + "default_timeout": 2, + "default_delay": 2, } if extra_vars_override: