2222# "arches": [
2323# "x86_64"
2424# ],
25- # "policy-cleanup": [
26- # "cloud-uploads",
25+ # "policy-cleanup": {
26+ # "cloud-uploads": true,
27+ # "images": true,
2728# "images-kept": ["qemu", "live-iso"]
28- # ]
29+ # }
2930# }
3031#
3132# We should also prune unreferenced build directories here. See also
@@ -40,6 +41,7 @@ import collections
4041import datetime
4142import os
4243import boto3
44+ import botocore
4345from dateutil .relativedelta import relativedelta
4446from cosalib .gcp import remove_gcp_image
4547from cosalib .aws import deregister_aws_resource
@@ -51,6 +53,12 @@ from cosalib.cmdlib import convert_duration_to_days
5153Build = collections .namedtuple ("Build" , ["id" , "images" , "arch" , "meta_json" ])
5254# set metadata caching to 5m
5355CACHE_MAX_AGE_METADATA = 60 * 5
56+ # These lists are up to date as of schema hash
57+ # 4c19aed3b3d84af278780bff63728510bb3e70613e4c4eef8cabd7939eb31bd8. If changing
58+ # this hash, ensure that the list of SUPPORTED and UNSUPPORTED artifacts below
59+ # is up to date.
60+ SUPPORTED = ["amis" , "gcp" ]
61+ UNSUPPORTED = ["aliyun" , "azurestack" , "digitalocean" , "exoscale" , "ibmcloud" , "powervs" , "azure" ]
5462
5563
5664def parse_args ():
@@ -88,13 +96,6 @@ def main():
8896 # This copies the local builds.json and updates the S3 bucket version.
8997 return handle_upload_builds_json (s3_client , bucket , prefix , args .dry_run , args .acl )
9098
91- # These lists are up to date as of schema hash
92- # 4c19aed3b3d84af278780bff63728510bb3e70613e4c4eef8cabd7939eb31bd8. If changing
93- # this hash, ensure that the list of supported and unsupported artifacts below
94- # is up to date.
95- supported = ["amis" , "gcp" ]
96- unsupported = ["aliyun" , "azurestack" , "digitalocean" , "exoscale" , "ibmcloud" , "powervs" , "azure" ]
97-
9899 with open (args .policy , "r" ) as f :
99100 policy = yaml .safe_load (f )
100101 if stream in policy :
@@ -114,36 +115,72 @@ def main():
114115 continue
115116 duration = convert_duration_to_days (policy [stream ][action ])
116117 ref_date = today_date - relativedelta (days = int (duration ))
118+ pruned_build_ids = []
119+ images_to_keep = policy .get (stream , {}).get ("images-keep" , [])
117120
118121 print (f"Pruning resources of type { action } older than { policy [stream ][action ]} ({ ref_date .date ()} ) on stream { stream } " )
119122 # Enumerating in reverse to go from the oldest build to the newest one
120123 for build in reversed (builds ):
121124 build_id = build ["id" ]
122- if action in build .get ("policy-cleanup" , []):
123- print (f"Build { build_id } has already had { action } pruning completed" )
124- continue
125125 (build_date , _ ) = parse_fcos_version_to_timestamp_and_stream (build_id )
126-
127126 if build_date >= ref_date :
128127 break
128+
129+ previous_cleanup = build .get ("policy-cleanup" , {})
130+ if action in previous_cleanup :
131+ # If we are in here then there has been some previous cleanup of
132+ # this type run for this build. For all types except `images` we
133+ # can just continue.
134+ if action != "images" :
135+ print (f"Build { build_id } has already had { action } pruning completed" )
136+ continue
137+ else :
138+ # OK `images` has been pruned before, but we need to check
139+ # that all the images were pruned that match the current policy.
140+ # i.e. there may be additional images we need prune
141+ previous_images_kept = previous_cleanup .get ("images-kept" , [])
142+ if set (images_to_keep ) == set (previous_images_kept ):
143+ print (f"Build { build_id } has already had { action } pruning completed" )
144+ continue
145+
129146 for arch in build ["arches" ]:
147+ print (f"Pruning { arch } { action } for { build_id } " )
130148 meta_prefix = os .path .join (prefix , f"{ build_id } /{ arch } /meta.json" )
131149 meta_json = get_json_from_s3 (s3_client , bucket , meta_prefix )
132150 # Make sure the meta.json doesn't contain any cloud_platform that is not supported for pruning yet.
133- images = get_supported_images (meta_json , unsupported , supported )
151+ images = get_supported_images (meta_json )
134152 current_build = Build (id = build_id , images = images , arch = arch , meta_json = meta_json )
135153
136154 match action :
137155 case "cloud-uploads" :
138156 prune_cloud_uploads (current_build , cloud_config , args .dry_run )
139- case "build" :
140- raise NotImplementedError
141- # print(f"Deleting key {prefix}{build.id} from bucket {bucket}")
142- # Delete the build's directory in S3
143- # S3().delete_object(args.bucket, f"{args.prefix}{str(current_build.id)}")
157+ # Prune through images that are not mentioned in images-keep
144158 case "images" :
145- raise NotImplementedError
146- build .setdefault ("policy-cleanup" , []).append ("cloud-uploads" )
159+ prune_images (s3_client , current_build , images_to_keep , args .dry_run , bucket , prefix )
160+ # Fully prune releases that are very old including deleting the directory in s3 for that build.
161+ case "build" :
162+ prune_build (s3_client , bucket , prefix , build_id , args .dry_run )
163+ pruned_build_ids .append (build_id )
164+ # Update policy-cleanup after processing all arches for the build
165+ policy_cleanup = build .setdefault ("policy-cleanup" , {})
166+ match action :
167+ case "cloud-uploads" :
168+ if "cloud-uploads" not in policy_cleanup :
169+ policy_cleanup ["cloud-uploads" ] = True
170+ case "images" :
171+ if "images" not in policy_cleanup :
172+ policy_cleanup ["images" ] = True
173+ policy_cleanup ["images-kept" ] = images_to_keep
174+
175+ if pruned_build_ids :
176+ if "tombstone-builds" not in builds_json_data :
177+ builds_json_data ["tombstone-builds" ] = []
178+ # Separate the builds into remaining builds and tombstone builds
179+ remaining_builds = [build for build in builds if build ["id" ] not in pruned_build_ids ]
180+ tombstone_builds = [build for build in builds if build ["id" ] in pruned_build_ids ]
181+ # Update the data structure
182+ builds_json_data ["builds" ] = remaining_builds
183+ builds_json_data ["tombstone-builds" ].extend (tombstone_builds )
147184
148185 # Save the updated builds.json to local builds/builds.json
149186 save_builds_json (builds_json_data , BUILDFILES ['list' ])
@@ -181,13 +218,15 @@ def validate_policy(stream, policy):
181218 raise Exception ("Duration of pruning cloud-uploads must be less than or equal to pruning a build" )
182219
183220
184- def get_supported_images (meta_json , unsupported , supported ):
221+ def get_supported_images (meta_json ):
185222 images = {}
186223 for key in meta_json :
187- if key in unsupported :
224+ if key in UNSUPPORTED :
188225 raise Exception (f"The platform { key } is not supported" )
189- if key in supported :
226+ if key in SUPPORTED :
190227 images [key ] = meta_json [key ]
228+ else :
229+ raise Exception (f"The platform { key } is neither in supported nor unsupported artifacts." )
191230 return images
192231
193232
@@ -320,5 +359,47 @@ def delete_gcp_image(build, cloud_config, dry_run):
320359 return errors
321360
322361
362+ def prune_images (s3 , build , images_to_keep , dry_run , bucket , prefix ):
363+ images_from_meta_json = build .meta_json .get ("images" , [])
364+ # Get the image names and paths currently in meta.json
365+ current_images_data = [(name , data .get ("path" )) for name , data in images_from_meta_json .items ()]
366+ errors = []
367+
368+ for name , path in current_images_data :
369+ if name not in images_to_keep :
370+ image_prefix = os .path .join (prefix , f"{ build .id } /{ build .arch } /{ path } " )
371+ if dry_run :
372+ print (f"Would prune { bucket } /{ image_prefix } " )
373+ else :
374+ try :
375+ s3 .delete_object (Bucket = bucket , Key = image_prefix )
376+ print (f"Pruned { name } image for { build .id } for { build .arch } " )
377+ except botocore .exceptions .ClientError as e :
378+ if e .response ['Error' ]['Code' ] == 'NoSuchKey' :
379+ print (f"{ bucket } /{ image_prefix } already pruned." )
380+ else :
381+ errors .append (e )
382+ if errors :
383+ print (f"Found errors when pruning images for { build .id } :" )
384+ for e in errors :
385+ print (e )
386+ raise Exception ("Some errors were encountered" )
387+
388+
389+ def prune_build (bucket , prefix , build_id , dry_run , s3_client ):
390+ build_prefix = os .path .join (prefix , f"{ build_id } /" )
391+ if dry_run :
392+ print (f"Would delete all resources in { bucket } /{ build_prefix } ." )
393+ else :
394+ try :
395+ bucket .objects .filter (Prefix = build_prefix ).delete ()
396+ print (f"Pruned { build_id } completely from s3" )
397+ except botocore .exceptions .ClientError as e :
398+ if e .response ['Error' ]['Code' ] == 'NoSuchKey' :
399+ print (f"{ bucket } /{ build_prefix } already pruned." )
400+ else :
401+ raise Exception (f"Error pruning { build_id } : { e .response ['Error' ]['Message' ]} " )
402+
403+
323404if __name__ == "__main__" :
324405 main ()
0 commit comments