Skip to content

Commit db59157

Browse files
Backend: Enhance delete_service_by_challenge_pk to handle ServiceNotFoundException gracefully (#4961)
- Updated the delete_service_by_challenge_pk function to treat ServiceNotFoundException as a success, allowing for proper cleanup of challenge state. - Added logic to deregister task definitions if they exist, while ensuring that failures in deregistration do not affect the overall success of the deletion process. - Introduced unit tests to verify the new behavior, ensuring that the function behaves correctly under various scenarios, including when the service does not exist and when deregistration fails.
1 parent 9c4adc0 commit db59157

File tree

3 files changed

+185
-11
lines changed

3 files changed

+185
-11
lines changed

apps/challenges/aws_utils.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,11 @@ def delete_service_by_challenge_pk(challenge):
443443
client, challenge, 0, False
444444
)
445445
if response["ResponseMetadata"]["HTTPStatusCode"] != HTTPStatus.OK:
446-
return response
446+
# If service doesn't exist, proceed to delete anyway (will be
447+
# handled gracefully)
448+
error_code = response.get("Error", {}).get("Code")
449+
if error_code != "ServiceNotFoundException":
450+
return response
447451

448452
response = client.delete_service(**kwargs)
449453
if response["ResponseMetadata"]["HTTPStatusCode"] == HTTPStatus.OK:
@@ -456,6 +460,30 @@ def delete_service_by_challenge_pk(challenge):
456460
challenge.save()
457461
return response
458462
except ClientError as e:
463+
# Handle ServiceNotFoundException gracefully - if the service doesn't exist,
464+
# the deletion goal is achieved. Clean up challenge state and return
465+
# success.
466+
error_code = e.response.get("Error", {}).get("Code")
467+
if error_code == "ServiceNotFoundException":
468+
logger.info(
469+
"Service for challenge %s does not exist, treating delete as success",
470+
challenge.pk,
471+
)
472+
challenge.workers = None
473+
challenge.save()
474+
# Try to deregister task definition if it exists, but don't fail if
475+
# it doesn't
476+
if challenge.task_def_arn:
477+
try:
478+
client.deregister_task_definition(
479+
taskDefinition=challenge.task_def_arn
480+
)
481+
except ClientError:
482+
pass # Task definition may not exist either
483+
challenge.task_def_arn = ""
484+
challenge.save()
485+
# Return a success-like response
486+
return {"ResponseMetadata": {"HTTPStatusCode": HTTPStatus.OK}}
459487
logger.exception(e)
460488
return e.response
461489

apps/challenges/task_definitions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,6 @@
523523
{{
524524
"cluster": "{CLUSTER}",
525525
"service": "{service_name}",
526-
"force": False
526+
"force": {force}
527527
}}
528528
"""

tests/unit/challenges/test_aws_utils.py

Lines changed: 155 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,143 @@ def test_delete_service_client_error(mock_challenge, mock_client):
432432
mock_client.deregister_task_definition.assert_not_called()
433433

434434

435+
def test_delete_service_service_not_found_returns_success(
436+
mock_challenge, mock_client
437+
):
438+
"""Test that ServiceNotFoundException is treated as success since the
439+
deletion goal is achieved (service doesn't exist)."""
440+
mock_challenge.workers = 0
441+
mock_challenge.task_def_arn = "valid_task_def_arn"
442+
mock_challenge.pk = 123
443+
444+
with patch(
445+
"challenges.aws_utils.get_boto3_client", return_value=mock_client
446+
):
447+
mock_client.delete_service.side_effect = ClientError(
448+
error_response={
449+
"Error": {"Code": "ServiceNotFoundException"},
450+
"ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST},
451+
},
452+
operation_name="DeleteService",
453+
)
454+
455+
response = delete_service_by_challenge_pk(mock_challenge)
456+
457+
# Should return success since the service doesn't exist
458+
assert response["ResponseMetadata"]["HTTPStatusCode"] == HTTPStatus.OK
459+
# Challenge state should be cleaned up
460+
assert mock_challenge.workers is None
461+
assert mock_challenge.task_def_arn == ""
462+
mock_challenge.save.assert_called()
463+
# Should attempt to deregister task definition
464+
mock_client.deregister_task_definition.assert_called_once_with(
465+
taskDefinition="valid_task_def_arn"
466+
)
467+
468+
469+
def test_delete_service_service_not_found_with_deregister_failure(
470+
mock_challenge, mock_client
471+
):
472+
"""Test that ServiceNotFoundException still succeeds even if deregister
473+
task definition also fails."""
474+
mock_challenge.workers = 0
475+
mock_challenge.task_def_arn = "valid_task_def_arn"
476+
mock_challenge.pk = 123
477+
478+
with patch(
479+
"challenges.aws_utils.get_boto3_client", return_value=mock_client
480+
):
481+
mock_client.delete_service.side_effect = ClientError(
482+
error_response={
483+
"Error": {"Code": "ServiceNotFoundException"},
484+
"ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST},
485+
},
486+
operation_name="DeleteService",
487+
)
488+
mock_client.deregister_task_definition.side_effect = ClientError(
489+
error_response={
490+
"Error": {"Code": "TaskDefinitionNotFound"},
491+
"ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST},
492+
},
493+
operation_name="DeregisterTaskDefinition",
494+
)
495+
496+
response = delete_service_by_challenge_pk(mock_challenge)
497+
498+
# Should still return success
499+
assert response["ResponseMetadata"]["HTTPStatusCode"] == HTTPStatus.OK
500+
# Challenge state should be cleaned up
501+
assert mock_challenge.workers is None
502+
assert mock_challenge.task_def_arn == ""
503+
mock_challenge.save.assert_called()
504+
505+
506+
def test_update_service_service_not_found_proceeds_to_delete(
507+
mock_challenge, mock_client
508+
):
509+
"""Test that when update_service fails with ServiceNotFoundException,
510+
the code proceeds to delete_service anyway."""
511+
mock_challenge.workers = 3
512+
mock_challenge.task_def_arn = "valid_task_def_arn"
513+
mock_challenge.pk = 123
514+
515+
response_metadata_ok = {
516+
"ResponseMetadata": {"HTTPStatusCode": HTTPStatus.OK}
517+
}
518+
response_service_not_found = {
519+
"ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST},
520+
"Error": {"Code": "ServiceNotFoundException"},
521+
}
522+
523+
with patch(
524+
"challenges.aws_utils.get_boto3_client", return_value=mock_client
525+
):
526+
with patch(
527+
"challenges.aws_utils.update_service_by_challenge_pk",
528+
return_value=response_service_not_found,
529+
):
530+
mock_client.delete_service.return_value = response_metadata_ok
531+
532+
response = delete_service_by_challenge_pk(mock_challenge)
533+
534+
# Should return success since delete succeeded
535+
assert response["ResponseMetadata"]["HTTPStatusCode"] == HTTPStatus.OK
536+
# delete_service should have been called despite update failure
537+
mock_client.delete_service.assert_called_once()
538+
mock_challenge.save.assert_called()
539+
540+
541+
def test_update_service_other_error_does_not_proceed_to_delete(
542+
mock_challenge, mock_client
543+
):
544+
"""Test that when update_service fails with a non-ServiceNotFoundException
545+
error, the code does NOT proceed to delete_service."""
546+
mock_challenge.workers = 3
547+
mock_challenge.task_def_arn = "valid_task_def_arn"
548+
549+
response_other_error = {
550+
"ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST},
551+
"Error": {"Code": "SomeOtherError"},
552+
}
553+
554+
with patch(
555+
"challenges.aws_utils.get_boto3_client", return_value=mock_client
556+
):
557+
with patch(
558+
"challenges.aws_utils.update_service_by_challenge_pk",
559+
return_value=response_other_error,
560+
):
561+
response = delete_service_by_challenge_pk(mock_challenge)
562+
563+
# Should return the error response
564+
assert (
565+
response["ResponseMetadata"]["HTTPStatusCode"]
566+
== HTTPStatus.BAD_REQUEST
567+
)
568+
# delete_service should NOT have been called
569+
mock_client.delete_service.assert_not_called()
570+
571+
435572
class TestServiceManager:
436573
@pytest.fixture
437574
def mock_client(self):
@@ -480,7 +617,8 @@ def test_service_manager_creates_service(
480617
"ResponseMetadata": {"HTTPStatusCode": HTTPStatus.OK}
481618
}
482619

483-
# Mock client_token_generator and create_service_by_challenge_pk to return a mock response
620+
# Mock client_token_generator and create_service_by_challenge_pk to
621+
# return a mock response
484622
with patch(
485623
"challenges.aws_utils.client_token_generator",
486624
return_value="mock_client_token",
@@ -1152,7 +1290,8 @@ def test_terminate_ec2_instance_client_error(
11521290
mock_ec2.terminate_instances.side_effect
11531291
)
11541292

1155-
# Ensure the EC2 instance ID was not cleared and the challenge was not saved
1293+
# Ensure the EC2 instance ID was not cleared and the challenge was not
1294+
# saved
11561295
self.assertNotEqual(challenge.ec2_instance_id, "")
11571296
challenge.save.assert_not_called()
11581297

@@ -2162,7 +2301,8 @@ def test_delete_workers_success(
21622301
# Assertions
21632302
self.assertEqual(result, {"count": 1, "failures": []})
21642303

2165-
# Ensure the delete_service_by_challenge_pk, get_log_group_name, and delete_log_group methods were called
2304+
# Ensure the delete_service_by_challenge_pk, get_log_group_name, and
2305+
# delete_log_group methods were called
21662306
mock_delete_service_by_challenge_pk.assert_called_once_with(
21672307
challenge=challenge_with_workers
21682308
)
@@ -2186,7 +2326,8 @@ def test_delete_workers_failure(
21862326
challenge_with_workers = MagicMock(pk=1, workers=5)
21872327
mock_queryset = [challenge_with_workers]
21882328

2189-
# Mock the delete_service_by_challenge_pk response to simulate a failure
2329+
# Mock the delete_service_by_challenge_pk response to simulate a
2330+
# failure
21902331
mock_delete_service_by_challenge_pk.return_value = {
21912332
"ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST},
21922333
"Error": "An error occurred",
@@ -2694,7 +2835,8 @@ def test_delete_log_group_non_debug_mode(
26942835
# Assert that get_boto3_client was called with the correct arguments
26952836
mock_get_boto3_client.assert_called_once_with("logs", aws_keys)
26962837

2697-
# Assert that delete_log_group was called on the client with the correct argument
2838+
# Assert that delete_log_group was called on the client with the
2839+
# correct argument
26982840
mock_client.delete_log_group.assert_called_once_with(
26992841
logGroupName="test-log-group"
27002842
)
@@ -2731,15 +2873,17 @@ def test_delete_log_group_with_exception(
27312873
# Assert that get_boto3_client was called with the correct arguments
27322874
mock_get_boto3_client.assert_called_once_with("logs", aws_keys)
27332875

2734-
# Assert that delete_log_group was called on the client with the correct argument
2876+
# Assert that delete_log_group was called on the client with the
2877+
# correct argument
27352878
mock_client.delete_log_group.assert_called_once_with(
27362879
logGroupName="test-log-group"
27372880
)
27382881

27392882
# Retrieve the actual arguments passed to logger.exception
27402883
args, kwargs = mock_logger.exception.call_args
27412884

2742-
# Check if the first argument of logger.exception contains the correct message
2885+
# Check if the first argument of logger.exception contains the correct
2886+
# message
27432887
self.assertTrue(
27442888
"Delete failed" in str(args[0]),
27452889
f"Expected 'Delete failed' in {args[0]}",
@@ -3192,7 +3336,8 @@ def test_setup_ec2_with_existing_instance(
31923336
self.challenge.save()
31933337
# Call the function
31943338
setup_ec2(self.serialized_challenge)
3195-
# Check if start_ec2_instance was called since the EC2 instance already exists
3339+
# Check if start_ec2_instance was called since the EC2 instance already
3340+
# exists
31963341
mock_start_ec2.assert_called_once_with(self.challenge)
31973342
mock_create_ec2.assert_not_called()
31983343

@@ -3211,7 +3356,8 @@ def test_setup_ec2_without_existing_instance(
32113356
self.challenge.save()
32123357
# Call the function
32133358
setup_ec2(self.serialized_challenge)
3214-
# Check if create_ec2_instance was called since the EC2 instance doesn't exist
3359+
# Check if create_ec2_instance was called since the EC2 instance
3360+
# doesn't exist
32153361
mock_create_ec2.assert_called_once_with(self.challenge)
32163362
mock_start_ec2.assert_not_called()
32173363

0 commit comments

Comments
 (0)