Skip to content

Commit aa8dfce

Browse files
authored
Test fixes for ModelPackage and move check for tags once resource completed (#71)
Checking for tags after the check for the existence of the resource arn may cause flaky tests where the tags aren't created/applied yet. Tests have failed with tags returning as empty even though after checking that resource later on console tags do exist. This pr moves this check for after the resource has reached created/completed status which should provide sufficient time for tags to be created/applied.
1 parent 7d06080 commit aa8dfce

10 files changed

+45
-32
lines changed

test/canary/scripts/install_controller_helm.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ function install_helm_chart() {
1414
yq w -i helm/values.yaml "aws.region" $region
1515
yq w -i helm/values.yaml "aws.account_id" $account_id
1616

17-
kubectl create namespace $namespace
1817
kubectl apply -f helm/crds
19-
helm install -n $namespace ack-$service-controller --skip-crds helm
18+
helm install -n $namespace --create-namespace ack-$service-controller --skip-crds helm
2019
}

test/e2e/common/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,6 @@
2828
ENDPOINT_STATUS_INSERVICE = "InService"
2929
ENDPOINT_STATUS_CREATING = "Creating"
3030
ENDPOINT_STATUS_UPDATING = "Updating"
31+
32+
DELETE_WAIT_PERIOD = 4
33+
DELETE_WAIT_LENGTH = 30

test/e2e/tests/test_endpoint.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,6 @@ def create_endpoint_test(self, sagemaker_client, xgboost_endpoint):
240240
endpoint_arn = endpoint_desc["EndpointArn"]
241241
assert self._get_resource_endpoint_arn(resource) == endpoint_arn
242242

243-
resource_tags = resource["spec"].get("tags", None)
244-
assert_tags_in_sync(endpoint_arn, resource_tags)
245-
246243
# endpoint transitions Creating -> InService state
247244
assert_endpoint_status_in_sync(
248245
endpoint_name, reference, cfg.ENDPOINT_STATUS_CREATING
@@ -254,6 +251,9 @@ def create_endpoint_test(self, sagemaker_client, xgboost_endpoint):
254251
)
255252
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
256253

254+
resource_tags = resource["spec"].get("tags", None)
255+
assert_tags_in_sync(endpoint_arn, resource_tags)
256+
257257
def update_endpoint_failed_test(
258258
self, sagemaker_client, single_variant_config, faulty_config, xgboost_endpoint
259259
):

test/e2e/tests/test_hpo.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,6 @@ def test_completed(self, xgboost_hpojob):
162162
hpo_arn = hpo_sm_desc["HyperParameterTuningJobArn"]
163163
assert k8s.get_resource_arn(resource) == hpo_arn
164164

165-
resource_tags = resource["spec"].get("tags", None)
166-
assert_tags_in_sync(hpo_arn, resource_tags)
167-
168165
assert hpo_sm_desc["HyperParameterTuningJobStatus"] == cfg.JOB_STATUS_INPROGRESS
169166
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")
170167

@@ -173,6 +170,9 @@ def test_completed(self, xgboost_hpojob):
173170
)
174171
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
175172

173+
resource_tags = resource["spec"].get("tags", None)
174+
assert_tags_in_sync(hpo_arn, resource_tags)
175+
176176
# Check that you can delete a completed resource from k8s
177177
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
178178
assert deleted is True

test/e2e/tests/test_model_package.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333

3434
RESOURCE_PLURAL = "modelpackages"
3535

36+
DELETE_WAIT_PERIOD = 20
37+
DELETE_WAIT_LENGTH = 30
38+
3639
# Disabled since versioned model package is not supported
3740
# Issue with multiple creates.
3841

@@ -111,7 +114,9 @@ def xgboost_unversioned_model_package():
111114
yield (reference, resource)
112115

113116
if k8s.get_resource_exists(reference):
114-
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
117+
_, deleted = k8s.delete_custom_resource(
118+
reference, DELETE_WAIT_PERIOD, DELETE_WAIT_LENGTH
119+
)
115120
assert deleted
116121

117122

@@ -202,10 +207,6 @@ def test_unversioned_model_package_completed(
202207

203208
assert k8s.get_resource_arn(resource) == model_package_arn
204209

205-
resource_tags = resource["spec"].get("tags", None)
206-
assert_tags_in_sync(model_package_arn, resource_tags)
207-
208-
assert model_package_desc["ModelPackageStatus"] == cfg.JOB_STATUS_INPROGRESS
209210
self._assert_model_package_status_in_sync(
210211
model_package_name, reference, cfg.JOB_STATUS_INPROGRESS
211212
)
@@ -216,8 +217,13 @@ def test_unversioned_model_package_completed(
216217
)
217218
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
218219

220+
resource_tags = resource["spec"].get("tags", None)
221+
assert_tags_in_sync(model_package_arn, resource_tags)
222+
219223
# Check that you can delete a completed resource from k8s
220-
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
224+
_, deleted = k8s.delete_custom_resource(
225+
reference, DELETE_WAIT_PERIOD, DELETE_WAIT_LENGTH
226+
)
221227
assert deleted is True
222228
assert get_sagemaker_model_package(model_package_name) is None
223229

@@ -275,4 +281,4 @@ def test_unversioned_model_package_completed(
275281
# # Check that you can delete a completed resource from k8s
276282
# _, deleted = k8s.delete_custom_resource(reference, 3, 10)
277283
# assert deleted is True
278-
# assert get_sagemaker_model_package(model_package_name) is None
284+
# assert get_sagemaker_model_package(model_package_name) is None

test/e2e/tests/test_model_package_group.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
@pytest.fixture(scope="module")
3737
def xgboost_model_package_group():
38-
resource_name = random_suffix_name("xgboost-model-package-group", 32)
38+
resource_name = random_suffix_name("xgboost-model-package-group", 50)
3939

4040
replacements = REPLACEMENT_VALUES.copy()
4141
replacements["MODEL_PACKAGE_GROUP_NAME"] = resource_name
@@ -52,7 +52,9 @@ def xgboost_model_package_group():
5252

5353
# Delete the k8s resource if not already deleted by tests
5454
if k8s.get_resource_exists(reference):
55-
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
55+
_, deleted = k8s.delete_custom_resource(
56+
reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
57+
)
5658
assert deleted
5759

5860

@@ -139,16 +141,19 @@ def test_create_model_package_group(self, xgboost_model_package_group):
139141
)
140142
model_package_group_arn = model_package_group_sm_desc["ModelPackageGroupArn"]
141143
assert k8s.get_resource_arn(resource) == model_package_group_arn
142-
resource_tags = resource["spec"].get("tags", None)
143-
assert_tags_in_sync(model_package_group_arn, resource_tags)
144144

145145
self._assert_model_package_group_status_in_sync(
146146
model_package_group_name, reference, cfg.JOB_STATUS_COMPLETED
147147
)
148148
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
149149

150+
resource_tags = resource["spec"].get("tags", None)
151+
assert_tags_in_sync(model_package_group_arn, resource_tags)
152+
150153
# Check that you can delete a completed resource from k8s
151-
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
154+
_, deleted = k8s.delete_custom_resource(
155+
reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH
156+
)
152157
assert deleted is True
153158

154159
assert get_sagemaker_model_package_group(model_package_group_name) is None

test/e2e/tests/test_monitoring_schedule.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,6 @@ def test_smoke(
163163
monitoring_schedule_arn = monitoring_schedule_desc["MonitoringScheduleArn"]
164164
assert k8s.get_resource_arn(resource) == monitoring_schedule_arn
165165

166-
resource_tags = resource["spec"].get("tags", None)
167-
assert_tags_in_sync(monitoring_schedule_arn, resource_tags)
168-
169166
# scheule transitions Pending -> Scheduled state
170167
# Pending status is shortlived only for 30 seconds because baselining job has already been run
171168
# remove the checks for Pending status if the test is flaky because of this
@@ -185,6 +182,9 @@ def test_smoke(
185182
)
186183
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
187184

185+
resource_tags = resource["spec"].get("tags", None)
186+
assert_tags_in_sync(monitoring_schedule_arn, resource_tags)
187+
188188
# Update the resource
189189
new_cron_expression = "cron(0 * * * ? *)"
190190
spec["spec"]["monitoringScheduleConfig"]["scheduleConfig"][

test/e2e/tests/test_processingjob.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,6 @@ def test_completed(self, kmeans_processing_job):
161161
processing_job_arn = processing_job_desc["ProcessingJobArn"]
162162
assert k8s.get_resource_arn(resource) == processing_job_arn
163163

164-
resource_tags = resource["spec"].get("tags", None)
165-
assert_tags_in_sync(processing_job_arn, resource_tags)
166-
167164
assert processing_job_desc["ProcessingJobStatus"] == cfg.JOB_STATUS_INPROGRESS
168165
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")
169166

@@ -172,6 +169,9 @@ def test_completed(self, kmeans_processing_job):
172169
)
173170
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
174171

172+
resource_tags = resource["spec"].get("tags", None)
173+
assert_tags_in_sync(processing_job_arn, resource_tags)
174+
175175
# Check that you can delete a completed resource from k8s
176176
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
177177
assert deleted is True

test/e2e/tests/test_trainingjob.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,6 @@ def test_completed(self, xgboost_training_job):
161161
training_job_arn = training_job_desc["TrainingJobArn"]
162162
assert k8s.get_resource_arn(resource) == training_job_arn
163163

164-
resource_tags = resource["spec"].get("tags", None)
165-
assert_tags_in_sync(training_job_arn, resource_tags)
166-
167164
assert training_job_desc["TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS
168165
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")
169166

@@ -172,6 +169,9 @@ def test_completed(self, xgboost_training_job):
172169
)
173170
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
174171

172+
resource_tags = resource["spec"].get("tags", None)
173+
assert_tags_in_sync(training_job_arn, resource_tags)
174+
175175
# Check that you can delete a completed resource from k8s
176176
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
177177
assert deleted is True

test/e2e/tests/test_trainingjob_debugger.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,6 @@ def test_completed(self, xgboost_training_job_debugger):
191191
training_job_arn = training_job_desc["TrainingJobArn"]
192192
assert k8s.get_resource_arn(resource) == training_job_arn
193193

194-
resource_tags = resource["spec"].get("tags", None)
195-
assert_tags_in_sync(training_job_arn, resource_tags)
196-
197194
assert training_job_desc["TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS
198195
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")
199196

@@ -208,6 +205,9 @@ def test_completed(self, xgboost_training_job_debugger):
208205
)
209206
assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
210207

208+
resource_tags = resource["spec"].get("tags", None)
209+
assert_tags_in_sync(training_job_arn, resource_tags)
210+
211211
# Check that you can delete a completed resource from k8s
212212
_, deleted = k8s.delete_custom_resource(reference, 3, 10)
213213
assert deleted is True

0 commit comments

Comments
 (0)