Skip to content

Commit 1f6e3d7

Browse files
Julien-Benfealebenpae
authored andcommitted
Cleanup untagged images in ECR (#4121)
# Summary Our agent repository on ECR again reached the max capacity (20'000). This is due to images with no tags, that we are not processing during teardowns. There were 17'500+ of them. This PR modifies our cleanup script to handle those untagged images.
1 parent 071ddf0 commit 1f6e3d7

File tree

1 file changed

+40
-10
lines changed

1 file changed

+40
-10
lines changed

scripts/evergreen/periodic-cleanup-aws.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ def describe_all_ecr_images(repository: str) -> List[dict]:
4040
return images
4141

4242

43-
def filter_images_matching_tag(images: List[dict]) -> List[dict]:
44-
"""Filter list for images containing the target pattern"""
45-
images_matching_tag = []
43+
def filter_tags_to_delete(images: List[dict]) -> List[dict]:
44+
"""Filter the image list to only delete tags matching the pattern, signatures, or untagged images."""
45+
filtered_images = []
46+
untagged_count = 0
4647
for image_detail in images:
4748
if "imageTags" in image_detail:
4849
for tag in image_detail["imageTags"]:
@@ -54,8 +55,24 @@ def filter_images_matching_tag(images: List[dict]) -> List[dict]:
5455
# Note that if the operator ever gets to major version 6, some tags can unintentionally match '_6'
5556
# It is an easy and relatively reliable way of identifying our test images tags
5657
if "_6" in tag or ".sig" in tag or contains_timestamped_tag(tag):
57-
images_matching_tag.append({"imageTag": tag, "imagePushedAt": image_detail["imagePushedAt"]})
58-
return images_matching_tag
58+
filtered_images.append(
59+
{
60+
"imageTag": tag,
61+
"imagePushedAt": image_detail["imagePushedAt"],
62+
"imageDigest": image_detail["imageDigest"],
63+
}
64+
)
65+
else:
66+
filtered_images.append(
67+
{
68+
"imageTag": "",
69+
"imagePushedAt": image_detail["imagePushedAt"],
70+
"imageDigest": image_detail["imageDigest"],
71+
}
72+
)
73+
untagged_count += 1
74+
print(f"found {untagged_count} untagged images")
75+
return filtered_images
5976

6077

6178
# match 107.0.0.8502-1-b20241125T000000Z-arm64
@@ -70,11 +87,22 @@ def get_images_with_dates(repository: str) -> List[dict]:
7087
"""Retrieve the list of patch images, corresponding to the regex, with push dates"""
7188
ecr_images = describe_all_ecr_images(repository)
7289
print(f"Found {len(ecr_images)} images in repository {repository}")
73-
images_matching_tag = filter_images_matching_tag(ecr_images)
90+
images_matching_tag = filter_tags_to_delete(ecr_images)
7491

7592
return images_matching_tag
7693

7794

95+
def batch_delete_images(repository: str, images: List[dict]) -> None:
96+
print(f"Deleting {len(images)} images in repository {repository}")
97+
digests_to_delete = [{"imageDigest": image["imageDigest"]} for image in images]
98+
# batch_delete_image only support a maximum of 100 images at a time
99+
for i in range(0, len(digests_to_delete), 100):
100+
batch = digests_to_delete[i : i + 100]
101+
print(f"Deleting batch {i//100 + 1} with {len(batch)} images...")
102+
ecr_client.batch_delete_image(repositoryName=repository, registryId=REGISTRY_ID, imageIds=batch)
103+
print(f"Deleted images")
104+
105+
78106
def delete_image(repository: str, image_tag: str) -> None:
79107
ecr_client.batch_delete_image(repositoryName=repository, registryId=REGISTRY_ID, imageIds=[{"imageTag": image_tag}])
80108
print(f"Deleted image with tag: {image_tag}")
@@ -92,26 +120,28 @@ def delete_images(
92120
# Process the images, deleting those older than the threshold
93121
delete_count = 0
94122
age_threshold_timedelta = timedelta(days=age_threshold)
123+
images_to_delete = []
95124
for image in images_with_dates:
96125
tag = image["imageTag"]
97126
push_date = image["imagePushedAt"]
98127
image_age = current_time - push_date
99128

100-
log_message_base = f"Image {tag}, was pushed at {push_date.isoformat()}"
129+
log_message_base = f"Image {tag if tag else 'UNTAGGED'} was pushed at {push_date.isoformat()}"
101130
delete_message = "should be cleaned up" if dry_run else "deleting..."
102131
if image_age > age_threshold_timedelta:
103132
print(f"{log_message_base}, older than {age_threshold} day(s), {delete_message}")
104-
if not dry_run:
105-
delete_image(repository, tag)
133+
images_to_delete.append(image)
106134
delete_count += 1
107135
else:
108136
print(f"{log_message_base}, not older than {age_threshold} day(s)")
137+
if not dry_run:
138+
batch_delete_images(repository, images_to_delete)
109139
deleted_message = "need to be cleaned up" if dry_run else "deleted"
110140
print(f"{delete_count} images {deleted_message}")
111141

112142

113143
def cleanup_repository(repository: str, age_threshold: int = DEFAULT_AGE_THRESHOLD_DAYS, dry_run: bool = False):
114-
print(f"Cleaning up images older than {DEFAULT_AGE_THRESHOLD_DAYS} day(s) from repository {repository}")
144+
print(f"Cleaning up images older than {age_threshold} day(s) from repository {repository}")
115145
print("Getting list of images...")
116146
images_with_dates = get_images_with_dates(repository)
117147
print(f"Images matching the pattern: {len(images_with_dates)}")

0 commit comments

Comments
 (0)