Skip to content

Commit 441f37b

Browse files
gursewak1997jlebon
authored andcommitted
Add new cosa cloud-prune command for garbage collection
Add the ability to run garbage collection on resources using cosa cloud-prune. This script would take policy.yaml and run the garbage collection accordingly for the stream specified.
1 parent 5f752c2 commit 441f37b

File tree

7 files changed

+339
-22
lines changed

7 files changed

+339
-22
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ schema-check:
9191
# Is the generated Go code synced with the schema?
9292
grep -q "$(DIGEST)" pkg/builds/cosa_v1.go
9393
grep -q "$(DIGEST)" pkg/builds/schema_doc.go
94+
grep -q "$(DIGEST)" src/cmd-cloud-prune
9495

9596
install:
9697
install -d $(DESTDIR)$(PREFIX)/lib/coreos-assembler

cmd/coreos-assembler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ var buildCommands = []string{"init", "fetch", "build", "run", "prune", "clean",
1616
var advancedBuildCommands = []string{"buildfetch", "buildupload", "oc-adm-release", "push-container"}
1717
var buildextendCommands = []string{"aliyun", "applehv", "aws", "azure", "digitalocean", "exoscale", "extensions-container", "gcp", "hashlist-experimental", "hyperv", "ibmcloud", "kubevirt", "live", "metal", "metal4k", "nutanix", "openstack", "qemu", "secex", "virtualbox", "vmware", "vultr"}
1818

19-
var utilityCommands = []string{"aws-replicate", "compress", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "remote-prune", "remote-session", "sign", "tag", "update-variant"}
19+
var utilityCommands = []string{"aws-replicate", "compress", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "cloud-prune", "remote-session", "sign", "tag", "update-variant"}
2020
var otherCommands = []string{"shell", "meta"}
2121

2222
func init() {

docs/cosa.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ Those less commonly used commands are listed here:
6565
| [oc-adm-release](https://github.com/coreos/coreos-assembler/blob/main/src/cmd-oc-adm-release) | Publish an oscontainer as the machine-os-content in an OpenShift release series
6666
| [offline-update](https://github.com/coreos/coreos-assembler/blob/main/src/cmd-offline-update) | Given a disk image and a coreos-assembler build, use supermin to update the disk image to the target OSTree commit "offline"
6767
| [prune](https://github.com/coreos/coreos-assembler/blob/main/src/cmd-prune) | This script removes previous builds. DO NOT USE on production pipelines
68-
| [remote-prune](https://github.com/coreos/coreos-assembler/blob/main/src/cmd-remote-prune) | Removes unreferenced builds from s3 bucket
68+
| [cloud-prune](https://github.com/coreos/coreos-assembler/blob/main/src/cmd-cloud-prune) | Prune resources as sepcified in policy.yaml
6969
| [sign](https://github.com/coreos/coreos-assembler/blob/main/src/cmd-sign) | Implements signing with RoboSignatory via fedora-messaging
7070
| [supermin-shell](https://github.com/coreos/coreos-assembler/blob/main/src/cmd-supermin-shell) | Get a supermin shell
7171
| [tag](https://github.com/coreos/coreos-assembler/blob/main/src/cmd-tag) | Operate on the tags in `builds.json`

src/cmd-cloud-prune

Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
#!/usr/bin/python3 -u
2+
3+
# This script parses a policy.yaml file, which outlines the specific
4+
# pruning actions required for each stream and the age threshold for
5+
# deleting artifacts within them.
6+
# Example of policy.yaml
7+
# rawhide:
8+
# # all cloud images
9+
# cloud-uploads: 2 years
10+
# # artifacts in meta.json's `images` key
11+
# images: 2 years
12+
# images-keep: [qemu, live-iso]
13+
# build: 3 years
14+
# The script also updates the builds.json for the respective stream by
15+
# adding the policy-cleanup key when we set the upload_builds_json flag.
16+
# It adds the relevant actions completed to that key
17+
# For eg:
18+
# "builds": [
19+
# {
20+
# "id": "40.20240425.dev.1",
21+
# "arches": [
22+
# "x86_64"
23+
# ],
24+
# "policy-cleanup": [
25+
# "cloud-uploads",
26+
# "images-kept": ["qemu", "live-iso"]
27+
# ]
28+
# }
29+
#
30+
# We should also prune unreferenced build directories here. See also
31+
# `get_unreferenced_s3_builds()` in the git log
32+
33+
import argparse
34+
import json
35+
from urllib.parse import urlparse
36+
import pytz
37+
import yaml
38+
import collections
39+
import datetime
40+
import os
41+
import boto3
42+
from dateutil.relativedelta import relativedelta
43+
from cosalib.gcp import remove_gcp_image
44+
from cosalib.aws import deregister_aws_resource
45+
from cosalib.builds import BUILDFILES
46+
from cosalib.s3 import s3_copy
47+
from cosalib.cmdlib import parse_fcos_version_to_timestamp
48+
49+
Build = collections.namedtuple("Build", ["id", "images", "arch", "meta_json"])
50+
# set metadata caching to 5m
51+
CACHE_MAX_AGE_METADATA = 60 * 5
52+
53+
54+
def parse_args():
55+
parser = argparse.ArgumentParser(prog="coreos-assembler cloud-prune")
56+
parser.add_argument("--policy", required=True, type=str, help="Path to policy YAML file")
57+
parser.add_argument("--dry-run", help="Don't actually delete anything", action='store_true')
58+
parser.add_argument("--upload-builds-json", help="Push builds.json", action='store_true')
59+
parser.add_argument("--stream", type=str, help="CoreOS stream", required=True)
60+
parser.add_argument("--gcp-json-key", help="GCP Service Account JSON Auth", default=os.environ.get("GCP_JSON_AUTH"))
61+
parser.add_argument("--gcp-project", help="GCP Project name", default=os.environ.get("GCP_PROJECT_NAME"))
62+
parser.add_argument("--acl", help="ACL for objects", action='store', default='private')
63+
parser.add_argument("--aws-config-file", default=os.environ.get("AWS_CONFIG_FILE"), help="Path to AWS config file")
64+
return parser.parse_args()
65+
66+
67+
def main():
68+
# Parse arguments and initialize variables
69+
args = parse_args()
70+
with open(BUILDFILES['sourceurl'], "r") as file:
71+
builds_source_data_url = file.read()
72+
bucket, prefix = get_s3_bucket_and_prefix(builds_source_data_url)
73+
cloud_config = get_cloud_config(args)
74+
stream = args.stream
75+
today_date = datetime.datetime.now()
76+
77+
# Boto3 loads credentials from ~/.aws/config by default and we can change
78+
# this default location by setting the AWS_CONFIG_FILE environment variable.
79+
# The Python bindings don't support passing a config file.
80+
# The alternative is to manually pass ACCESS_KEY and SECRET_KEY which isn't favourable.
81+
if args.aws_config_file:
82+
os.environ["AWS_CONFIG_FILE"] = args.aws_config_file
83+
s3_client = boto3.client("s3")
84+
85+
# Upload builds.json to s3 bucket
86+
if args.upload_builds_json:
87+
# This copies the local builds.json and updates the S3 bucket version.
88+
return handle_upload_builds_json(s3_client, bucket, prefix, args.dry_run, args.acl)
89+
90+
# These lists are up to date as of schema hash
91+
# 4c19aed3b3d84af278780bff63728510bb3e70613e4c4eef8cabd7939eb31bd8. If changing
92+
# this hash, ensure that the list of supported and unsupported artifacts below
93+
# is up to date.
94+
supported = ["amis", "gcp"]
95+
unsupported = ["aliyun", "azurestack", "digitalocean", "exoscale", "ibmcloud", "powervs", "azure"]
96+
97+
with open(args.policy, "r") as f:
98+
policy = yaml.safe_load(f)
99+
validate_policy(stream, policy)
100+
101+
with open(BUILDFILES['list'], "r") as f:
102+
builds_json_data = json.load(f)
103+
104+
# Prune builds based on the policy
105+
for action in ['cloud-uploads', 'images', 'build']:
106+
if action not in policy[stream]:
107+
continue
108+
duration = get_period_in_months(policy[stream][action])
109+
ref_date = today_date - relativedelta(months=int(duration))
110+
111+
print(f"Pruning resources of type {action} older than {duration} months ({ref_date.date()}) on stream {stream}")
112+
# Enumerating in reverse to go from the oldest build to the newest one
113+
for index, build in enumerate(reversed(builds_json_data["builds"])):
114+
build_id = build["id"]
115+
if action in build.get("policy-cleanup", []):
116+
print(f"Build {build_id} has already had {action} pruning completed")
117+
continue
118+
build_date = parse_fcos_version_to_timestamp(build_id)
119+
120+
if build_date >= ref_date:
121+
break
122+
for arch in build["arches"]:
123+
meta_prefix = os.path.join(prefix, f"{build_id}/{arch}/meta.json")
124+
meta_json = get_json_from_s3(s3_client, bucket, meta_prefix)
125+
# Make sure the meta.json doesn't contain any cloud_platform that is not supported for pruning yet.
126+
images = get_supported_images(meta_json, unsupported, supported)
127+
current_build = Build(id=build_id, images=images, arch=arch, meta_json=meta_json)
128+
129+
match action:
130+
case "cloud-uploads":
131+
prune_cloud_uploads(current_build, cloud_config, args.dry_run)
132+
case "build":
133+
raise NotImplementedError
134+
# print(f"Deleting key {prefix}{build.id} from bucket {bucket}")
135+
# Delete the build's directory in S3
136+
# S3().delete_object(args.bucket, f"{args.prefix}{str(current_build.id)}")
137+
case "images":
138+
raise NotImplementedError
139+
if not args.dry_run:
140+
build.setdefault("policy-cleanup", []).append("cloud-uploads")
141+
builds_json_data["builds"][index] = build
142+
143+
if not args.dry_run:
144+
# Save the updated builds.json to local builds/builds.json
145+
save_builds_json(builds_json_data)
146+
147+
148+
def get_s3_bucket_and_prefix(builds_source_data_url):
149+
parsed_url = urlparse(builds_source_data_url)
150+
if parsed_url.scheme == "s3":
151+
bucket, prefix = parsed_url.netloc, parsed_url.path.lstrip("/")
152+
return bucket, prefix
153+
raise Exception("Invalid scheme: only s3:// supported")
154+
155+
156+
def get_cloud_config(args):
157+
return {
158+
"gcp": {
159+
"json-key": args.gcp_json_key,
160+
"project": args.gcp_project
161+
},
162+
"aws": {
163+
"credentials": args.aws_config_file
164+
}
165+
}
166+
167+
168+
def validate_policy(stream, policy):
169+
# If the build key is set in the policy file, then the cloud-uploads key must
170+
# also be present, and the duration of cloud-uploads must be equal or shorter
171+
if "build" in policy[stream]:
172+
actions = policy[stream]
173+
if 'cloud-uploads' not in actions:
174+
raise Exception("Pruning for cloud-uploads must be set before we prune the builds")
175+
cloud_uploads_duration = get_period_in_months(actions["cloud-uploads"])
176+
build_duration = get_period_in_months(actions["build"])
177+
if cloud_uploads_duration > build_duration:
178+
raise Exception("Duration of pruning cloud-uploads must be less than or equal to pruning a build")
179+
180+
181+
def get_supported_images(meta_json, unsupported, supported):
182+
images = {}
183+
for key in meta_json:
184+
if key in unsupported:
185+
raise Exception(f"The platform {key} is not supported")
186+
if key in supported:
187+
images[key] = meta_json[key]
188+
return images
189+
190+
191+
def get_json_from_s3(s3, bucket, key):
192+
try:
193+
response = s3.get_object(Bucket=bucket, Key=key)
194+
content = response["Body"].read().decode("utf-8")
195+
return json.loads(content)
196+
except Exception as e:
197+
raise Exception(f"Error fetching the JSON file from S3 {bucket}/{key}: {e}")
198+
199+
200+
def save_builds_json(builds_json_data):
201+
builds_json_data["timestamp"] = datetime.datetime.now(pytz.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
202+
with open(BUILDFILES['list'], "w") as json_file:
203+
json.dump(builds_json_data, json_file, indent=2)
204+
205+
206+
def handle_upload_builds_json(s3_client, bucket, prefix, dry_run, acl):
207+
remote_builds_json = get_json_from_s3(s3_client, bucket, os.path.join(prefix, "builds.json"))
208+
with open(BUILDFILES['sourcedata'], "r") as f:
209+
builds_json_source_data = json.load(f)
210+
# Check if there are any changes that were made to remote(s3 version) builds.json
211+
# while the pruning was in progress
212+
if remote_builds_json != builds_json_source_data:
213+
print("Detected remote updates to builds.json. Merging it to the local builds.json file")
214+
with open(BUILDFILES['list'], "r") as f:
215+
current_builds_json = json.load(f)
216+
update_policy_cleanup(current_builds_json, remote_builds_json)
217+
if not dry_run:
218+
# Make sure we have the merged json as local builds/builds.json
219+
save_builds_json(remote_builds_json)
220+
# Upload the local builds.json to s3
221+
return s3_copy(s3_client, BUILDFILES['list'], bucket, f'{prefix}/builds.json', CACHE_MAX_AGE_METADATA, acl, extra_args={}, dry_run=dry_run)
222+
223+
224+
# Function to update policy-cleanup keys into remote_builds
225+
def update_policy_cleanup(current_builds, remote_builds):
226+
current_builds_dict = {build['id']: build for build in current_builds['builds']}
227+
for remote_build in remote_builds['builds']:
228+
build_id = remote_build['id']
229+
if build_id in current_builds_dict:
230+
current_build = current_builds_dict[build_id]
231+
if 'policy-cleanup' in current_build:
232+
remote_build['policy-cleanup'] = current_build['policy-cleanup']
233+
234+
235+
def prune_cloud_uploads(build, cloud_config, dry_run):
236+
# Ensure AWS AMIs and GCP images are removed based on the configuration
237+
errors = []
238+
errors.extend(deregister_aws_amis(build, cloud_config, dry_run))
239+
errors.extend(delete_gcp_image(build, cloud_config, dry_run))
240+
241+
if errors:
242+
print(f"Found errors when removing cloud-uploads for {build.id}:")
243+
for e in errors:
244+
print(e)
245+
raise Exception("Some errors were encountered")
246+
247+
248+
def deregister_aws_amis(build, cloud_config, dry_run):
249+
errors = []
250+
aws_credentials = cloud_config.get("aws", {}).get("credentials")
251+
for ami in build.images.get("amis", []):
252+
region_name = ami.get("name")
253+
ami_id = ami.get("hvm")
254+
snapshot_id = ami.get("snapshot")
255+
if dry_run:
256+
print(f"Would delete {ami_id} and {snapshot_id} for {build.id}")
257+
continue
258+
if ami_id and snapshot_id and region_name:
259+
try:
260+
deregister_aws_resource(ami_id, snapshot_id, region=region_name, credentials_file=aws_credentials)
261+
except Exception as e:
262+
errors.append(e)
263+
else:
264+
errors.append(f"Missing parameters to remove {ami_id} and {snapshot_id}")
265+
return errors
266+
267+
268+
def delete_gcp_image(build, cloud_config, dry_run):
269+
errors = []
270+
gcp = build.images.get("gcp")
271+
if not gcp:
272+
print(f"No GCP image for {build.id} for {build.arch}")
273+
return
274+
gcp_image = gcp.get("image")
275+
json_key = cloud_config.get("gcp", {}).get("json-key")
276+
project = cloud_config.get("gcp", {}).get("project")
277+
if dry_run:
278+
print(f"Would delete {gcp_image} GCP image for {build.id}")
279+
elif gcp_image and json_key and project:
280+
try:
281+
remove_gcp_image(gcp_image, json_key, project)
282+
except Exception as e:
283+
errors.append(e)
284+
else:
285+
errors.append(f"Missing parameters to remove {gcp_image}")
286+
return errors
287+
288+
289+
def get_period_in_months(duration):
290+
val, unit = duration.split(maxsplit=1)
291+
if unit in ["years", "year", "y"]:
292+
return int(val) * 12
293+
elif unit in ["months", "month", "m"]:
294+
return int(val)
295+
else:
296+
raise Exception(f"Duration unit provided is {unit}. Pruning duration is only supported in years and months")
297+
298+
299+
if __name__ == "__main__":
300+
main()

src/cosalib/aws.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,33 @@
1-
import boto3
21
import json
32
import os
43
import subprocess
54
import sys
65

76
from cosalib.cmdlib import (
87
flatten_image_yaml,
9-
retry_boto_exception,
10-
retry_callback,
11-
retry_stop
8+
runcmd
129
)
1310
from tenacity import (
1411
retry,
1512
stop_after_attempt
1613
)
1714

1815

19-
@retry(stop=retry_stop, retry=retry_boto_exception,
20-
before_sleep=retry_callback)
21-
def deregister_ami(ami_id, region):
22-
print(f"AWS: deregistering AMI {ami_id} in {region}")
23-
ec2 = boto3.client('ec2', region_name=region)
24-
ec2.deregister_image(ImageId=ami_id)
25-
26-
27-
@retry(stop=retry_stop, retry=retry_boto_exception,
28-
before_sleep=retry_callback)
29-
def delete_snapshot(snap_id, region):
30-
print(f"AWS: removing snapshot {snap_id} in {region}")
31-
ec2 = boto3.client('ec2', region_name=region)
32-
ec2.delete_snapshot(SnapshotId=snap_id)
16+
@retry(reraise=True, stop=stop_after_attempt(3))
17+
def deregister_aws_resource(ami, snapshot, region, credentials_file):
18+
print(f"AWS: deregistering AMI {ami} and {snapshot} in {region}")
19+
try:
20+
runcmd([
21+
'ore', 'aws', 'delete-image',
22+
'--credentials-file', credentials_file,
23+
'--ami', ami,
24+
'--snapshot', snapshot,
25+
"--region", region,
26+
"--allow-missing"
27+
])
28+
print(f"AWS: successfully removed {ami} and {snapshot}")
29+
except SystemExit:
30+
raise Exception(f"Failed to remove {ami} or {snapshot}")
3331

3432

3533
@retry(reraise=True, stop=stop_after_attempt(3))

0 commit comments

Comments
 (0)