|
5 | 5 | import os |
6 | 6 | import boto3 |
7 | 7 |
|
8 | | -LOCALSTACK_ENDPOINT_URL = os.environ.get("LOCALSTACK_ENDPOINT_URL", "http://localhost:4566") |
| 8 | +LOCALSTACK_ENDPOINT_URL = os.environ.get( |
| 9 | + "LOCALSTACK_ENDPOINT_URL", "http://localhost:4566" |
| 10 | +) |
9 | 11 | CHAOS_ENDPOINT = f"{LOCALSTACK_ENDPOINT_URL}/_localstack/chaos/faults" |
10 | 12 |
|
11 | 13 | HOSTED_ZONE_NAME = "hello-localstack.com" |
|
17 | 19 | HEALTH_CHECK_RESOURCE_PATH = "/dev/healthcheck" |
18 | 20 |
|
19 | 21 | PRIMARY_API_GATEWAY_FQDN = f"{PRIMARY_API_ID}.execute-api.localhost.localstack.cloud" |
20 | | -SECONDARY_API_GATEWAY_FQDN = f"{SECONDARY_API_ID}.execute-api.localhost.localstack.cloud" |
| 22 | +SECONDARY_API_GATEWAY_FQDN = ( |
| 23 | + f"{SECONDARY_API_ID}.execute-api.localhost.localstack.cloud" |
| 24 | +) |
21 | 25 | FAILOVER_RECORD_NAME = f"test.{HOSTED_ZONE_NAME}" |
22 | 26 |
|
23 | 27 | HEALTH_CHECK_INTERVAL = 10 |
|
27 | 31 | DNS_CHECK_DELAY = 5 |
28 | 32 | FAILOVER_REACTION_WAIT = (HEALTH_CHECK_INTERVAL * HEALTH_CHECK_FAILURE_THRESHOLD) + 25 |
29 | 33 |
|
30 | | -def get_cname_target(hostname, dns_server='127.0.0.1', port=53, max_cname_hops=5): |
31 | | - """ |
32 | | - Resolves a hostname and follows the CNAME chain. |
33 | | - It aims to return the FQDN that matches the *.execute-api.localhost.localstack.cloud pattern, |
34 | | - or the last CNAME target if that pattern is not explicitly hit within max_cname_hops but was seen. |
35 | | - """ |
| 34 | + |
| 35 | +def get_cname_target(hostname, dns_server="127.0.0.1", port=53, max_cname_hops=5): |
36 | 36 | resolver = dns.resolver.Resolver() |
37 | 37 | resolver.nameservers = [dns_server] |
38 | 38 | resolver.port = port |
39 | 39 | resolver.timeout = 2 |
40 | 40 | resolver.lifetime = 5 |
41 | 41 |
|
42 | 42 | current_hostname = hostname |
43 | | - last_seen_api_gateway_pattern_fqdn = None |
44 | 43 |
|
45 | | - print(f" Resolving ultimate CNAME target for: {current_hostname} using DNS server {dns_server}:{port}") |
46 | | - |
47 | 44 | for hop in range(max_cname_hops): |
48 | 45 | if ".execute-api.localhost.localstack.cloud" in current_hostname: |
49 | | - print(f" CNAME trace: Current hostname '{current_hostname}' matches API gateway pattern. Considering it final.") |
50 | 46 | return current_hostname |
51 | | - |
52 | | - last_seen_api_gateway_pattern_fqdn = current_hostname |
53 | 47 |
|
54 | | - print(f" CNAME trace (Hop {hop+1}/{max_cname_hops}): Querying '{current_hostname}' for CNAME...") |
55 | 48 | try: |
56 | | - answers = resolver.resolve(current_hostname, 'CNAME') |
| 49 | + answers = resolver.resolve(current_hostname, "CNAME") |
57 | 50 | if answers and len(answers) > 0: |
58 | | - new_target = str(answers[0].target).rstrip('.') |
59 | | - print(f" CNAME trace: '{current_hostname}' -> CNAME -> '{new_target}'") |
60 | | - |
61 | | - if not new_target or new_target == current_hostname: |
62 | | - print(f" CNAME trace: Detected CNAME loop or empty target at '{current_hostname}'. Returning previous or current.") |
| 51 | + new_target = str(answers[0].target).rstrip(".") |
| 52 | + if not new_target or new_target == current_hostname: |
63 | 53 | return current_hostname |
64 | | - |
65 | 54 | current_hostname = new_target |
66 | 55 | if ".execute-api.localhost.localstack.cloud" in current_hostname: |
67 | | - print(f" CNAME trace: New target '{current_hostname}' matches API gateway pattern.") |
68 | 56 | return current_hostname |
69 | | - else: |
70 | | - print(f" CNAME trace: Query for '{current_hostname}' successful but no CNAME records. Assuming it's the final target.") |
| 57 | + else: |
71 | 58 | return current_hostname |
72 | | - |
73 | 59 | except dns.resolver.NoAnswer: |
74 | | - print(f" CNAME trace: No CNAME answer specifically for '{current_hostname}'. This is considered the final target in the chain.") |
75 | | - return current_hostname |
| 60 | + return current_hostname |
76 | 61 | except dns.resolver.NXDOMAIN: |
77 | | - print(f" CNAME trace: NXDOMAIN for '{current_hostname}'. Record does not exist.") |
78 | 62 | return "NXDOMAIN" |
79 | 63 | except dns.exception.Timeout: |
80 | | - print(f" CNAME trace: Timeout querying '{current_hostname}'.") |
81 | 64 | return "TIMEOUT" |
82 | 65 | except Exception as e: |
83 | | - print(f" CNAME trace: Error querying '{current_hostname}': {type(e).__name__} - {e}") |
84 | 66 | return f"ERROR_RESOLVING" |
85 | | - |
86 | | - print(f" CNAME trace: Exceeded max CNAME hops ({max_cname_hops}) for original '{hostname}'. Last known target: '{current_hostname}'") |
87 | 67 | return current_hostname |
88 | 68 |
|
89 | 69 |
|
90 | 70 | @pytest.fixture(scope="session") |
91 | 71 | def route53_client(): |
92 | | - return boto3.client("route53", endpoint_url=LOCALSTACK_ENDPOINT_URL, region_name=HEALTH_CHECK_RESOURCE_REGION) |
| 72 | + return boto3.client( |
| 73 | + "route53", |
| 74 | + endpoint_url=LOCALSTACK_ENDPOINT_URL, |
| 75 | + region_name=HEALTH_CHECK_RESOURCE_REGION, |
| 76 | + ) |
| 77 | + |
93 | 78 |
|
94 | 79 | @pytest.fixture(scope="session") |
95 | 80 | def health_check_id(route53_client): |
96 | | - print("\n--- Fixture: Locating existing Health Check ID ---") |
97 | 81 | try: |
98 | | - paginator = route53_client.get_paginator('list_health_checks') |
| 82 | + paginator = route53_client.get_paginator("list_health_checks") |
99 | 83 | for page in paginator.paginate(): |
100 | | - for hc in page.get('HealthChecks', []): |
101 | | - config = hc.get('HealthCheckConfig', {}) |
102 | | - if config.get('FullyQualifiedDomainName') == PRIMARY_API_GATEWAY_FQDN and \ |
103 | | - config.get('Port') == HEALTH_CHECK_PORT and \ |
104 | | - config.get('ResourcePath') == HEALTH_CHECK_RESOURCE_PATH: |
105 | | - found_id = hc['Id'] |
106 | | - print(f"Found existing Health Check ID: {found_id} for {PRIMARY_API_GATEWAY_FQDN}") |
| 84 | + for hc in page.get("HealthChecks", []): |
| 85 | + config = hc.get("HealthCheckConfig", {}) |
| 86 | + if ( |
| 87 | + config.get("FullyQualifiedDomainName") == PRIMARY_API_GATEWAY_FQDN |
| 88 | + and config.get("Port") == HEALTH_CHECK_PORT |
| 89 | + and config.get("ResourcePath") == HEALTH_CHECK_RESOURCE_PATH |
| 90 | + ): |
| 91 | + found_id = hc["Id"] |
107 | 92 | return found_id |
108 | | - pytest.fail(f"Could not find an existing health check for {PRIMARY_API_GATEWAY_FQDN}:{HEALTH_CHECK_PORT}{HEALTH_CHECK_RESOURCE_PATH}") |
| 93 | + pytest.fail( |
| 94 | + f"Could not find an existing health check for {PRIMARY_API_GATEWAY_FQDN}:{HEALTH_CHECK_PORT}{HEALTH_CHECK_RESOURCE_PATH}" |
| 95 | + ) |
109 | 96 | except Exception as e: |
110 | 97 | pytest.fail(f"Error trying to find health check ID: {e}") |
111 | 98 | return None |
112 | 99 |
|
113 | 100 |
|
114 | 101 | def perform_dns_check_with_retry(fqdn_to_check, expected_target_fqdn, step_name): |
115 | | - """Helper to perform DNS check with retries and assert.""" |
116 | 102 | print(f"\n{step_name} (expecting: {expected_target_fqdn})...") |
117 | 103 | current_target = None |
118 | 104 | for i in range(DNS_CHECK_RETRIES): |
119 | | - print(f" Attempt {i+1}/{DNS_CHECK_RETRIES} to resolve {fqdn_to_check}...") |
120 | 105 | current_target = get_cname_target(fqdn_to_check) |
121 | | - print(f" DNS ultimate target for {fqdn_to_check}: {current_target}") |
122 | 106 | if current_target == expected_target_fqdn: |
123 | | - print(f" Successfully resolved {fqdn_to_check} to {expected_target_fqdn}.") |
124 | | - return current_target |
125 | | - if current_target == "TIMEOUT" or "ERROR_RESOLVING" in str(current_target) or ("FAILED_ALL_RETRIES_FOR" in str(current_target)): |
126 | | - print(f" Definitive error resolving, will not pass. current_target: {current_target}") |
127 | | - break |
128 | | - print(f" Retrying in {DNS_CHECK_DELAY}s...") |
| 107 | + return current_target |
| 108 | + if ( |
| 109 | + current_target == "TIMEOUT" |
| 110 | + or "ERROR_RESOLVING" in str(current_target) |
| 111 | + or ("FAILED_ALL_RETRIES_FOR" in str(current_target)) |
| 112 | + ): |
| 113 | + break |
129 | 114 | time.sleep(DNS_CHECK_DELAY) |
130 | | - |
131 | | - assert current_target == expected_target_fqdn, \ |
132 | | - f"Expected DNS resolution for {fqdn_to_check} to be {expected_target_fqdn}, but got {current_target} after {DNS_CHECK_RETRIES} retries." |
| 115 | + |
| 116 | + assert ( |
| 117 | + current_target == expected_target_fqdn |
| 118 | + ), f"Expected DNS resolution for {fqdn_to_check} to be {expected_target_fqdn}, but got {current_target} after {DNS_CHECK_RETRIES} retries." |
133 | 119 | return current_target |
134 | 120 |
|
135 | 121 |
|
136 | 122 | def test_dns_failover_cycle(route53_client, health_check_id): |
137 | | - print(f"\n--- Test Case: DNS Failover and Failback ---") |
138 | | - print(f"Using Health Check ID: {health_check_id}") |
139 | | - print(f"Testing FQDN: {FAILOVER_RECORD_NAME}") |
140 | | - print(f"Primary expected ultimate target: {PRIMARY_API_GATEWAY_FQDN}") |
141 | | - print(f"Secondary expected ultimate target: {SECONDARY_API_GATEWAY_FQDN}") |
142 | | - |
143 | | - print(f"\n0. Performing initial wait ({INITIAL_DNS_WAIT_PERIOD}s) for DNS records to propagate...") |
144 | 123 | time.sleep(INITIAL_DNS_WAIT_PERIOD) |
145 | 124 |
|
146 | | - perform_dns_check_with_retry(FAILOVER_RECORD_NAME, PRIMARY_API_GATEWAY_FQDN, "1. Verifying initial DNS resolution") |
| 125 | + perform_dns_check_with_retry( |
| 126 | + FAILOVER_RECORD_NAME, |
| 127 | + PRIMARY_API_GATEWAY_FQDN, |
| 128 | + "1. Verifying initial DNS resolution", |
| 129 | + ) |
147 | 130 |
|
148 | | - print(f"\n2. Inducing chaos for 'apigateway' and 'lambda' in region '{PRIMARY_API_REGION}'...") |
| 131 | + print( |
| 132 | + f"\n2. Inducing chaos for 'apigateway' and 'lambda' in region '{PRIMARY_API_REGION}'..." |
| 133 | + ) |
149 | 134 | fault_payload = [ |
150 | 135 | {"service": "apigateway", "region": PRIMARY_API_REGION}, |
151 | | - {"service": "lambda", "region": PRIMARY_API_REGION} |
| 136 | + {"service": "lambda", "region": PRIMARY_API_REGION}, |
152 | 137 | ] |
153 | 138 | try: |
154 | 139 | response = requests.post(CHAOS_ENDPOINT, json=fault_payload, timeout=10) |
155 | 140 | response.raise_for_status() |
156 | | - print(f" Chaos injection successful: {response.json()}") |
157 | 141 | except requests.exceptions.RequestException as e: |
158 | 142 | pytest.fail(f"Failed to inject chaos: {e}") |
159 | | - |
160 | | - print(f" Waiting {FAILOVER_REACTION_WAIT}s for failover to occur...") |
161 | 143 | time.sleep(FAILOVER_REACTION_WAIT) |
162 | 144 |
|
163 | | - perform_dns_check_with_retry(FAILOVER_RECORD_NAME, SECONDARY_API_GATEWAY_FQDN, "3. Verifying DNS failover to secondary") |
| 145 | + perform_dns_check_with_retry( |
| 146 | + FAILOVER_RECORD_NAME, |
| 147 | + SECONDARY_API_GATEWAY_FQDN, |
| 148 | + "3. Verifying DNS failover to secondary", |
| 149 | + ) |
164 | 150 |
|
165 | | - print(f"\n4. Clearing chaos for 'apigateway' and 'lambda' in region '{PRIMARY_API_REGION}'...") |
| 151 | + print( |
| 152 | + f"\n4. Clearing chaos for 'apigateway' and 'lambda' in region '{PRIMARY_API_REGION}'..." |
| 153 | + ) |
166 | 154 | try: |
167 | 155 | response = requests.delete(CHAOS_ENDPOINT, json=fault_payload, timeout=10) |
168 | 156 | response.raise_for_status() |
169 | | - print(f" Chaos clear response status: {response.status_code}, content: {response.text[:100]}...") |
170 | 157 | except requests.exceptions.RequestException as e: |
171 | 158 | pytest.fail(f"Failed to clear chaos: {e}") |
172 | | - |
173 | | - print(f" Waiting {FAILOVER_REACTION_WAIT}s for failback to occur...") |
174 | 159 | time.sleep(FAILOVER_REACTION_WAIT) |
175 | 160 |
|
176 | | - perform_dns_check_with_retry(FAILOVER_RECORD_NAME, PRIMARY_API_GATEWAY_FQDN, "5. Verifying DNS failback to primary") |
177 | | - |
178 | | - print("\n--- DNS Failover and Failback Test Completed Successfully ---") |
| 161 | + perform_dns_check_with_retry( |
| 162 | + FAILOVER_RECORD_NAME, |
| 163 | + PRIMARY_API_GATEWAY_FQDN, |
| 164 | + "5. Verifying DNS failback to primary", |
| 165 | + ) |
0 commit comments