Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/coreos-assembler.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ var buildCommands = []string{"init", "fetch", "build", "run", "prune", "clean",
var advancedBuildCommands = []string{"buildfetch", "buildupload", "oc-adm-release", "push-container"}
var buildextendCommands = []string{"aliyun", "applehv", "aws", "azure", "digitalocean", "exoscale", "extensions-container", "gcp", "hashlist-experimental", "hyperv", "ibmcloud", "kubevirt", "live", "metal", "metal4k", "nutanix", "openstack", "qemu", "secex", "virtualbox", "vmware", "vultr"}

var utilityCommands = []string{"aws-replicate", "cloud-prune", "compress", "container-prune", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "remote-session", "sign", "tag", "update-variant"}
var utilityCommands = []string{"aws-replicate", "cloud-prune", "compress", "copy-container", "koji-upload", "kola", "push-container-manifest", "remote-build-container", "remote-session", "sign", "tag", "update-variant"}
var otherCommands = []string{"shell", "meta"}

func init() {
Expand Down
92 changes: 90 additions & 2 deletions src/cmd-cloud-prune
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we should consider renaming this coreos-prune or something since it's doing it all now.

Let's only do that in a separate PR (or maybe one final commit once we're at the end of code review).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will open another PR for this once this gets merged in.

Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@

import argparse
import json
import subprocess
from urllib.parse import urlparse
import pytz
import requests
import yaml
import collections
import datetime
Expand All @@ -59,6 +61,8 @@ CACHE_MAX_AGE_METADATA = 60 * 5
# is up to date.
SUPPORTED = ["amis", "gcp"]
UNSUPPORTED = ["aliyun", "azure", "ibmcloud", "powervs"]
# list of known streams with containers
STREAMS = {"next", "testing", "stable", "next-devel", "testing-devel", "rawhide", "branched"}


def parse_args():
Expand All @@ -70,6 +74,8 @@ def parse_args():
parser.add_argument("--gcp-json-key", help="GCP Service Account JSON Auth", default=os.environ.get("GCP_JSON_AUTH"))
parser.add_argument("--acl", help="ACL for objects", action='store', default='private')
parser.add_argument("--aws-config-file", default=os.environ.get("AWS_CONFIG_FILE"), help="Path to AWS config file")
parser.add_argument("--registry-auth-file", default=os.environ.get("REGISTRY_AUTH_FILE"),
help="Path to docker registry auth file. Directly passed to skopeo.")
return parser.parse_args()


Expand Down Expand Up @@ -110,6 +116,12 @@ def main():
builds = builds_json_data["builds"]
pruned_build_ids = []
images_to_keep = policy.get(stream, {}).get("images-keep", [])
barrier_releases = set()
# Get the update graph for stable streams
if stream in ['stable', 'testing', 'next']:
update_graph = get_update_graph(stream)['releases']
# Keep only the barrier releases
barrier_releases = set([release["version"] for release in update_graph if "barrier" in release])

# Iterate through builds from oldest to newest
for build in reversed(builds):
Expand All @@ -125,7 +137,7 @@ def main():
current_build = Build(id=build_id, images=images, arch=arch, meta_json=meta_json)

# Iterate over actions (policy types) to apply pruning
for action in ['cloud-uploads', 'images', 'build']:
for action in ['cloud-uploads', 'images', 'build', 'containers']:
if action not in policy[stream]:
continue
action_duration = convert_duration_to_days(policy[stream][action])
Expand Down Expand Up @@ -162,7 +174,22 @@ def main():
case "build":
prune_build(s3_client, bucket, prefix, build_id, args.dry_run)
pruned_build_ids.append(build_id)

case "containers":
# Our containers are manifest listed, which means deleting the container tag
# for one architecture deletes it for all of them. We'll choose to only prune
# for x86_64 since it is the one architecture that exists for all builds.
if arch == "x86_64":
if build_id in barrier_releases:
# Since containers are used for updates we need to keep around containers for barrier releases.
print(f"Release {build_id} is a barrier release. Skipping container prune.")
continue
# Retrieve container tags excluding the stream name since it updates with each release.
container_tags, container_repo = get_container_tags(meta_json, exclude=[stream])
if container_tags:
for tag in container_tags:
prune_container(tag, args.dry_run, container_repo, args.registry_auth_file)
else:
print(f"No container tags to prune for build {build_id}.")
# Update policy-cleanup after pruning actions for the architecture
policy_cleanup = build.setdefault("policy-cleanup", {})
for action in policy[stream].keys(): # Only update actions specified in policy[stream]
Expand All @@ -174,6 +201,9 @@ def main():
if "images" not in policy_cleanup:
policy_cleanup["images"] = True
policy_cleanup["images-kept"] = images_to_keep
case "containers":
if "containers" not in policy_cleanup:
policy_cleanup["containers"] = True

if pruned_build_ids:
if "tombstone-builds" not in builds_json_data:
Expand Down Expand Up @@ -414,5 +444,63 @@ def prune_build(s3_client, bucket, prefix, build_id, dry_run):
raise Exception(f"Error pruning {build_id}: {e.response['Error']['Message']}")


def get_container_tags(meta_json, exclude):
base_oscontainer = meta_json.get("base-oscontainer")
if base_oscontainer:
tags = base_oscontainer.get("tags", [])
filtered_tags = [tag for tag in tags if tag not in exclude]
container_repo = base_oscontainer.get("image", "")
return filtered_tags, container_repo
return [], ""


def prune_container(tag, dry_run, container_repo, registry_auth_file):
if dry_run:
print(f"Would prune image {container_repo}:{tag}")
else:
skopeo_delete(container_repo, tag, registry_auth_file)


def get_update_graph(stream):
url = f"https://builds.coreos.fedoraproject.org/updates/{stream}.json"
r = requests.get(url, timeout=5)
if r.status_code != 200:
raise Exception(f"Could not download update graph for {stream}. HTTP {r.status_code}")
return r.json()


def skopeo_inspect(repo, tag, auth):
skopeo_args = ["skopeo", "inspect", "--no-tags", "--retry-times=10", f"docker://{repo}:{tag}"]
if auth:
skopeo_args.extend(["--authfile", auth])
try:
subprocess.check_output(skopeo_args, stderr=subprocess.STDOUT)
return True # Inspection succeeded
except subprocess.CalledProcessError as e:
exit_code = e.returncode
error_message = e.output.decode("utf-8")

# Exit code 2 indicates the image tag does not exist. We will consider it as pruned.
if exit_code == 2:
print(f"Skipping deletion for {repo}:{tag} since the tag does not exist.")
return False
else:
# Handle other types of errors
raise Exception(f"Inspection failed for {repo}:{tag} with exit code {exit_code}: {error_message}")


def skopeo_delete(repo, image, auth):
if skopeo_inspect(repo, image, auth): # Only proceed if inspection succeeds
skopeo_args = ["skopeo", "delete", f"docker://{repo}:{image}"]
if auth:
skopeo_args.extend(["--authfile", auth])
try:
subprocess.check_output(skopeo_args, stderr=subprocess.STDOUT)
print(f"Image {repo}:{image} deleted successfully.")
except subprocess.CalledProcessError as e:
# Throw an exception in case the delete command fail despite the image existing
raise Exception("An error occurred during deletion:", e.output.decode("utf-8"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if there is a way for us to gracefully exit here i.e. there was a lot of work done before this that would be nice to save (via updating builds.json). But maybe that's not too important.

Copy link
Member Author

@gursewak1997 gursewak1997 Nov 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, and this applies to other actions as well. Initially, we discussed this and concluded that updating the builds.json file in every run didn’t make much sense. The focus was to minimize updates to the file with any kind of failure.
Ideally, subsequent runs shouldn’t take more than a few minutes, apart from the first iteration for each stream. Hence, it didn’t seem like a critical concern.



if __name__ == "__main__":
main()
125 changes: 0 additions & 125 deletions src/cmd-container-prune

This file was deleted.

Loading