Skip to content

Commit e953ae5

Browse files
authored
rework Table e2e tests to remove flakiness (#52)
There were two e2e tests that were flaky in `test/e2e/tests/test_table.py`. The flakiest test was test_enable_ttl, which often would result in the following: ``` > assert ttl["TimeToLiveDescription"]["AttributeName"] == "ForumName" E KeyError: 'AttributeName' tests/test_table.py:158: KeyError ``` This was caused by the dynamodb client `describe_time_to_live` returning a `ResourceNotFoundException` when the Table was not in ACTIVE status and even when the Table was in ACTIVE status, there is an amount of time when the DescribeTimeToLive's TimeToLiveDescription output shape is either empty or is a struct with no fields :( I updated the test to first wait until the Table becomes ACTIVE and has valid output responses before attempting to read the TTL information. The second flaky test was the test_table_update_tags. The sleep after calling the `kubectl patch` with the updated tags was too short at 5 seconds since it sometimes takes a little while for the tags to appear in the Tagris APIs. Increasing this sleep to 10 fixes this flakiness. Issue aws-controllers-k8s/community#1561 Signed-off-by: Jay Pipes <[email protected]> By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent eb1405d commit e953ae5

File tree

2 files changed

+147
-25
lines changed

2 files changed

+147
-25
lines changed

test/e2e/table.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You may
4+
# not use this file except in compliance with the License. A copy of the
5+
# License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is distributed
10+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11+
# express or implied. See the License for the specific language governing
12+
# permissions and limitations under the License.
13+
14+
"""Utilities for working with Table resources"""
15+
16+
import datetime
17+
import time
18+
import typing
19+
20+
import boto3
21+
import pytest
22+
23+
DEFAULT_WAIT_UNTIL_TIMEOUT_SECONDS = 60*10
24+
DEFAULT_WAIT_UNTIL_INTERVAL_SECONDS = 15
25+
26+
TableMatchFunc = typing.NewType(
27+
'TableMatchFunc',
28+
typing.Callable[[dict], bool],
29+
)
30+
31+
class StatusMatcher:
32+
def __init__(self, status):
33+
self.match_on = status
34+
35+
def __call__(self, record: dict) -> bool:
36+
return ('TableStatus' in record
37+
and record['TableStatus'] == self.match_on)
38+
39+
40+
def status_matches(status: str) -> TableMatchFunc:
41+
return StatusMatcher(status)
42+
43+
44+
class TTLAttributeMatcher:
45+
def __init__(self, attr_name):
46+
self.attr_name = attr_name
47+
48+
def __call__(self, record: dict) -> bool:
49+
if 'TableStatus' in record and record['TableStatus'] != 'ACTIVE':
50+
return False
51+
table_name = record['TableName']
52+
# NOTE(jaypipes): The reason we have to do this craziness is because
53+
# DynamoDB's DescribeTimeToLive API is straight up bonkers. If you
54+
# update the TTL on a Table, the Table needs to transition to ACTIVE
55+
# status before DescribeTimeToLive will return a 200 and even after it
56+
# does, you need to wait additional time until the
57+
# TimeToLiveDescription response shape contains an AttributeName field
58+
# that matches what you set in your update call. The
59+
# TimeToLiveDescription field can be empty or can be a blank struct
60+
# with no fields in it for a long time after updating TTL on a Table...
61+
ttl = get_time_to_live(table_name)
62+
if ttl is not None:
63+
if 'AttributeName' in ttl:
64+
if ttl['AttributeName'] == self.attr_name:
65+
return True
66+
return False
67+
68+
69+
def ttl_on_attribute_matches(attr_name: str) -> TableMatchFunc:
70+
return TTLAttributeMatcher(attr_name)
71+
72+
73+
def wait_until(
74+
table_name: str,
75+
match_fn: TableMatchFunc,
76+
timeout_seconds: int = DEFAULT_WAIT_UNTIL_TIMEOUT_SECONDS,
77+
interval_seconds: int = DEFAULT_WAIT_UNTIL_INTERVAL_SECONDS,
78+
) -> None:
79+
"""Waits until a Table with a supplied name is returned from the DynamoDB
80+
API and the matching functor returns True.
81+
82+
Usage:
83+
from e2e.table import wait_until, status_matches
84+
85+
wait_until(
86+
table_name,
87+
status_matches("ACTIVE"),
88+
)
89+
90+
Raises:
91+
pytest.fail upon timeout
92+
"""
93+
now = datetime.datetime.now()
94+
timeout = now + datetime.timedelta(seconds=timeout_seconds)
95+
96+
while not match_fn(get(table_name)):
97+
if datetime.datetime.now() >= timeout:
98+
pytest.fail("failed to match DBInstance before timeout")
99+
time.sleep(interval_seconds)
100+
101+
102+
def get(table_name):
103+
"""Returns a dict containing the Role record from the IAM API.
104+
105+
If no such Table exists, returns None.
106+
"""
107+
c = boto3.client('dynamodb')
108+
try:
109+
resp = c.describe_table(TableName=table_name)
110+
return resp['Table']
111+
except c.exceptions.ResourceNotFoundException:
112+
return None
113+
114+
115+
def get_time_to_live(table_name):
116+
"""Returns the TTL specification for the table with a supplied name.
117+
118+
If no such Table exists, returns None.
119+
"""
120+
c = boto3.client('dynamodb')
121+
try:
122+
resp = c.describe_time_to_live(TableName=table_name)
123+
return resp['TimeToLiveDescription']
124+
except c.exceptions.ResourceNotFoundException:
125+
return None

test/e2e/tests/test_table.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@
2929
)
3030
from e2e.replacement_values import REPLACEMENT_VALUES
3131
from e2e import condition
32+
from e2e import table
3233
from e2e import tag
3334

3435
RESOURCE_PLURAL = "tables"
3536

3637
DELETE_WAIT_AFTER_SECONDS = 15
37-
MODIFY_WAIT_AFTER_SECONDS = 5
38+
MODIFY_WAIT_AFTER_SECONDS = 10
39+
3840

3941
@pytest.fixture(scope="module")
4042
def forum_table():
@@ -74,39 +76,29 @@ def forum_table():
7476
_, deleted = k8s.delete_custom_resource(table_reference, period_length=DELETE_WAIT_AFTER_SECONDS)
7577
assert deleted
7678

79+
7780
@service_marker
7881
@pytest.mark.canary
7982
class TestTable:
80-
def get_table(self, dynamodb_client, table_name: str) -> dict:
81-
try:
82-
resp = dynamodb_client.describe_table(
83-
TableName=table_name,
84-
)
85-
return resp["Table"]
86-
87-
except Exception as e:
88-
logging.debug(e)
89-
return None
83+
def table_exists(self, table_name: str) -> bool:
84+
return table.get(table_name) is not None
9085

91-
def table_exists(self, dynamodb_client, table_name: str) -> bool:
92-
return self.get_table(dynamodb_client, table_name) is not None
93-
94-
def test_create_delete(self, dynamodb_client, forum_table):
86+
def test_create_delete(self, forum_table):
9587
(ref, res) = forum_table
9688

9789
table_name = res["spec"]["tableName"]
9890
condition.assert_synced(ref)
9991

10092
# Check DynamoDB Table exists
101-
assert self.table_exists(dynamodb_client, table_name)
93+
assert self.table_exists(table_name)
10294

103-
def test_table_update_tags(self, dynamodb_client, forum_table):
95+
def test_table_update_tags(self, forum_table):
10496
(ref, res) = forum_table
10597

10698
table_name = res["spec"]["tableName"]
10799

108100
# Check DynamoDB Table exists
109-
assert self.table_exists(dynamodb_client, table_name)
101+
assert self.table_exists(table_name)
110102

111103
# Get CR latest revision
112104
cr = k8s.wait_resource_consumed_by_controller(ref)
@@ -129,13 +121,13 @@ def test_table_update_tags(self, dynamodb_client, forum_table):
129121
assert table_tags[0]['Key'] == tags[0]['key']
130122
assert table_tags[0]['Value'] == tags[0]['value']
131123

132-
def test_enable_ttl(self, dynamodb_client, forum_table):
124+
def test_enable_ttl(self, forum_table):
133125
(ref, res) = forum_table
134126

135127
table_name = res["spec"]["tableName"]
136128

137129
# Check DynamoDB Table exists
138-
assert self.table_exists(dynamodb_client, table_name)
130+
assert self.table_exists(table_name)
139131

140132
# Get CR latest revision
141133
cr = k8s.wait_resource_consumed_by_controller(ref)
@@ -152,9 +144,14 @@ def test_enable_ttl(self, dynamodb_client, forum_table):
152144

153145
# Patch k8s resource
154146
k8s.patch_custom_resource(ref, updates)
155-
time.sleep(MODIFY_WAIT_AFTER_SECONDS)
156147

157-
ttl = dynamodb_client.describe_time_to_live(TableName=table_name)
158-
assert ttl["TimeToLiveDescription"]["AttributeName"] == "ForumName"
159-
assert (ttl["TimeToLiveDescription"]["TimeToLiveStatus"] == "ENABLED" or
160-
ttl["TimeToLiveDescription"]["TimeToLiveStatus"] == "ENABLING")
148+
table.wait_until(
149+
table_name,
150+
table.ttl_on_attribute_matches("ForumName"),
151+
)
152+
153+
ttl = table.get_time_to_live(table_name)
154+
assert ttl is not None
155+
assert ttl["AttributeName"] == "ForumName"
156+
ttl_status = ttl["TimeToLiveStatus"]
157+
assert ttl_status in ("ENABLED", "ENABLING")

0 commit comments

Comments
 (0)