Skip to content
This repository was archived by the owner on Jan 26, 2024. It is now read-only.

Commit 180b370

Browse files
Merge pull request #2 from awslabs/f-aurora
Add support for aurora
2 parents 3761ad2 + 8267ca0 commit 180b370

File tree

3 files changed

+147
-20
lines changed

3 files changed

+147
-20
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ This action detaches classic load balancers from subnets belonging to target AZ
131131
132132
#### Relational Database Service (RDS)
133133
134-
This action forces RDS to reboot and failover to another AZ:
134+
This action forces RDS to reboot and failover to another AZ, and/or promotes one of the Aurora Replicas (read-only instances) in the DB cluster to be the primary instance (the cluster writer):
135135
```yaml
136136
- type: action
137137
name: Simulate AZ Failure for RDS

azchaosaws/rds/actions.py

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ def fail_az(
1919
configuration: Configuration = None,
2020
) -> Dict[str, Any]:
2121
"""
22-
Reboots and forces a failover of your RDS instances to another AZ. Only RDS instances with the corresponding tags and is in the target AZ
22+
Reboots and forces a failover of your RDS instances (including Aurora single-master clusters) to another AZ. Only RDS instances
23+
and/or DB clusters with the corresponding tags and is in the target AZ
2324
with Multi-AZ enabled will be impacted.
2425
2526
Parameters:
@@ -28,7 +29,7 @@ def fail_az(
2829
dry_run (bool): The boolean flag to simulate a dry run or not. Setting to True will only run read-only operations and not make changes to resources. (Accepted values: True | False)
2930
3031
Optional:
31-
tags (List[Dict[str, str]]): A list of key-value pairs to filter the RDS instance(s) by. (Default: [{'Key': 'AZ_FAILURE', 'Value': 'True'}])
32+
tags (List[Dict[str, str]]): A list of key-value pairs to filter the RDS instance(s) and/or DB cluster(s) by. (Default: [{'Key': 'AZ_FAILURE', 'Value': 'True'}])
3233
3334
Return Structure:
3435
{
@@ -60,8 +61,8 @@ def fail_az(
6061
"AvailabilityZone": az,
6162
"DryRun": dry_run,
6263
"DBInstances": {
63-
"Success": {"DBInstanceIdentifiers": []},
64-
"Failed": {"DBInstanceIdentifiers": []},
64+
"Success": {"DBInstanceIdentifiers": [], "DBClusterIdentifier": []},
65+
"Failed": {"DBInstanceIdentifiers": [], "DBClusterIdentifier": []},
6566
},
6667
}
6768

@@ -114,10 +115,98 @@ def fail_az(
114115
)
115116
)
116117

118+
logger.info("[RDS] Fetching DB clusters...")
119+
120+
success_failover_clusters, failed_clusters = [], []
121+
122+
paginator = rds_client.get_paginator("describe_db_clusters")
123+
124+
for p in paginator.paginate():
125+
for cluster in p["DBClusters"]:
126+
writer_az, reader_azs = str(), []
127+
if all(t in cluster["TagList"] for t in tags):
128+
if cluster["MultiAZ"]:
129+
for member in cluster["DBClusterMembers"]:
130+
resp = rds_client.describe_db_instances(
131+
DBInstanceIdentifier=member["DBInstanceIdentifier"]
132+
)
133+
if resp["DBInstances"] and member["IsClusterWriter"]:
134+
writer_az = resp["DBInstances"][0]["AvailabilityZone"]
135+
if writer_az != az:
136+
logger.warning(
137+
"[RDS] DB cluster {} writer not in target AZ".format(
138+
cluster["DBClusterIdentifier"]
139+
)
140+
)
141+
break
142+
else:
143+
reader_azs.append(
144+
resp["DBInstances"][0]["AvailabilityZone"]
145+
)
146+
if not any(writer_az == reader_az for reader_az in reader_azs):
147+
logger.info(
148+
"[RDS] Database cluster %s found with primary in %s",
149+
cluster["DBClusterIdentifier"],
150+
writer_az,
151+
)
152+
153+
try:
154+
logger.warning(
155+
"[RDS] Based on config provided, DB cluster {} will failover".format(
156+
cluster["DBClusterIdentifier"]
157+
)
158+
)
159+
if not dry_run:
160+
failover_db_cluster_response = (
161+
rds_client.failover_db_cluster(
162+
DBClusterIdentifier=cluster[
163+
"DBClusterIdentifier"
164+
]
165+
)
166+
)
167+
logger.debug(failover_db_cluster_response)
168+
169+
success_failover_clusters.append(
170+
cluster["DBClusterIdentifier"]
171+
)
172+
except Exception as e:
173+
logger.error(
174+
"failed trying to failover for '{}': '{}'".format(
175+
cluster["DBClusterIdentifier"], str(e)
176+
)
177+
)
178+
failed_clusters.append(cluster["DBClusterIdentifier"])
179+
180+
if not success_failover_dbs:
181+
logger.warning(
182+
"[RDS] No DB instances to failover... Ensure that the DBs in the AZ you specified are tagged with the tag filter you provided or tagged with the default value."
183+
)
184+
else:
185+
logger.info(
186+
"[RDS] DB instances that was forced to failover: {} count({})".format(
187+
success_failover_dbs, len(success_failover_dbs)
188+
)
189+
)
190+
191+
if not success_failover_clusters:
192+
logger.warning(
193+
"[RDS] No DB clusters to failover... Ensure that the DB cluster(s) in with primary in the AZ you specified are tagged with the tag filter you provided or tagged with the default value."
194+
)
195+
else:
196+
logger.info(
197+
"[RDS] DB clusters that was forced to failover: {} count({})".format(
198+
success_failover_clusters, len(success_failover_clusters)
199+
)
200+
)
201+
117202
# Add to state
118203
fail_az_state["DBInstances"]["Success"][
119204
"DBInstanceIdentifiers"
120205
] = success_failover_dbs
121206
fail_az_state["DBInstances"]["Failed"]["DBInstanceIdentifiers"] = failed_dbs
207+
fail_az_state["DBInstances"]["Success"][
208+
"DBClusterIdentifier"
209+
] = success_failover_clusters
210+
fail_az_state["DBInstances"]["Failed"]["DBClusterIdentifier"] = failed_clusters
122211

123212
return fail_az_state

tests/test_rds_actions.py

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,62 @@ def test_fail_az_rds_basic(client):
1010
az = "ap-southeast-1a"
1111
dry_run = False
1212
db_instance_identifier = "mysqlinstance"
13-
multi_az = True
14-
tags = [{"Key": "AZ_FAILURE", "Value": "True"}]
15-
16-
mock_client.get_paginator.return_value.paginate.return_value = [
17-
{
18-
"DBInstances": [
19-
{
20-
"DBInstanceIdentifier": db_instance_identifier,
21-
"AvailabilityZone": az,
22-
"MultiAZ": multi_az,
23-
"TagList": tags,
24-
},
25-
]
26-
}
27-
]
13+
db_cluster_identifier = "my-cluster"
14+
15+
mock_client.get_paginator = get_mock_paginate
2816

2917
fail_az(az=az, dry_run=dry_run)
3018

3119
mock_client.reboot_db_instance.assert_called_with(
3220
DBInstanceIdentifier=db_instance_identifier, ForceFailover=True
3321
)
22+
23+
mock_client.failover_db_cluster.assert_called_with(
24+
DBClusterIdentifier=db_cluster_identifier
25+
)
26+
27+
28+
def get_mock_paginate(operation_name):
29+
return {
30+
"describe_db_instances": MagicMock(
31+
paginate=MagicMock(
32+
return_value=[
33+
{
34+
"DBInstances": [
35+
{
36+
"DBInstanceIdentifier": "mysqlinstance",
37+
"AvailabilityZone": "ap-southeast-1a",
38+
"MultiAZ": True,
39+
"TagList": [{"Key": "AZ_FAILURE", "Value": "True"}],
40+
},
41+
]
42+
}
43+
]
44+
)
45+
),
46+
"describe_db_clusters": MagicMock(
47+
paginate=MagicMock(
48+
return_value=[
49+
{
50+
"DBClusters": [
51+
{
52+
"DBClusterIdentifier": "my-cluster",
53+
"MultiAZ": True,
54+
"DBClusterMembers": [
55+
{
56+
"DBInstanceIdentifier": "my-post-gres-instance-1",
57+
"IsClusterWriter": True,
58+
},
59+
{
60+
"DBInstanceIdentifier": "my-post-gres-instance-2",
61+
"IsClusterWriter": False,
62+
},
63+
],
64+
"TagList": [{"Key": "AZ_FAILURE", "Value": "True"}],
65+
},
66+
]
67+
}
68+
]
69+
)
70+
),
71+
}.get(operation_name, MagicMock())

0 commit comments

Comments
 (0)