diff --git a/src/cmd-diff b/src/cmd-diff index adf4256dc6..86029ca057 100755 --- a/src/cmd-diff +++ b/src/cmd-diff @@ -7,9 +7,7 @@ import json import subprocess import sys import tempfile -import time import rpm -from multiprocessing import Process from dataclasses import dataclass from enum import IntEnum @@ -162,27 +160,27 @@ def main(): diff_from = DiffBuildTarget.from_build(builds, args.diff_from, args.arch) diff_to = DiffBuildTarget.from_build(builds, args.diff_to, args.arch) + if args.gc: + # some of the dirs in the rootfs are dumb and have "private" bits + runcmd(['find', DIFF_CACHE, '-type', 'd', '-exec', 'chmod', 'u+rwx', '{}', '+']) + shutil.rmtree(DIFF_CACHE) + # get activated differs active_differs = [] for differ in DIFFERS: if getattr(args, differ.name.replace('-', '_')): active_differs += [differ] - # ensure commits are imported if we know we'll need them - ostree_import = max([d.needs_ostree for d in active_differs]) - if ostree_import > OSTreeImport.NO: - for target in [diff_from, diff_to]: - import_ostree_commit('.', target.dir, target.meta, extract_json=False, - partial_import=(ostree_import == OSTreeImport.PARTIAL)) - - # start diff'ing - for differ in active_differs: - differ.function(diff_from, diff_to) - - if args.gc: - # some of the dirs in the rootfs are dumb and have "private" bits - runcmd(['find', DIFF_CACHE, '-type', 'd', '-exec', 'chmod', 'u+rwx', '{}', '+']) - shutil.rmtree(DIFF_CACHE) + if active_differs: + # ensure commits are imported if we know we'll need them + ostree_import = max([d.needs_ostree for d in active_differs]) + if ostree_import > OSTreeImport.NO: + for target in [diff_from, diff_to]: + import_ostree_commit('.', target.dir, target.meta, extract_json=False, + partial_import=(ostree_import == OSTreeImport.PARTIAL)) + # start diff'ing + for differ in active_differs: + differ.function(diff_from, diff_to) def parse_args(): @@ -531,72 +529,80 @@ def run_guestfs_mount(image_path, mount_target): # the paths to be used for analysis and then clean up once given back # control. def diff_metal_helper(diff_from, diff_to): - metal_from = get_metal_path(diff_from) - metal_to = get_metal_path(diff_to) - - mount_dir_from = os.path.join(cache_dir("metal"), diff_from.id) - mount_dir_to = os.path.join(cache_dir("metal"), diff_to.id) - - for d in [mount_dir_from, mount_dir_to]: - if os.path.exists(d): - shutil.rmtree(d) - os.makedirs(d) - - # As the libreguest mount call is blocking until unmounted, let's - # do that in a separate thread - p_from = Process(target=run_guestfs_mount, args=(metal_from, mount_dir_from)) - p_to = Process(target=run_guestfs_mount, args=(metal_to, mount_dir_to)) - - try: - p_from.start() - p_to.start() - # Wait for the FUSE mounts to be ready. We'll check for a known file. - for i, d in enumerate([mount_dir_from, mount_dir_to]): - p = p_from if i == 0 else p_to - timeout = 60 # seconds - start_time = time.time() - check_file = os.path.join(d, 'ostree') - while not os.path.exists(check_file): - time.sleep(1) - if time.time() - start_time > timeout: - raise Exception(f"Timeout waiting for mount in {d}") - if not p.is_alive(): - raise Exception(f"A guestfs process for {os.path.basename(d)} died unexpectedly.") - - # Allow the caller to operate on these values - yield mount_dir_from, mount_dir_to - - finally: - # Unmount the FUSE binds, this will make the guestfs mount calls return - runcmd(['fusermount', '-u', mount_dir_from], check=False) - runcmd(['fusermount', '-u', mount_dir_to], check=False) - - # Ensure the background processes are terminated - def shutdown_process(process): - process.join(timeout=5) - if process.is_alive(): - process.terminate() - process.join() - - shutdown_process(p_from) - shutdown_process(p_to) + metal_image_from = get_metal_path(diff_from) + metal_image_to = get_metal_path(diff_to) + + diff_dir_from = os.path.join(cache_dir("metal"), diff_from.id) + diff_dir_to = os.path.join(cache_dir("metal"), diff_to.id) + + for image_path, diff_dir in [(metal_image_from, diff_dir_from), + (metal_image_to, diff_dir_to)]: + if os.path.exists(diff_dir): + # If it exists assume it's cached already and we don't + # need to do anything. If it's stale for whatever reason + # the user can `cosa diff --gc`. + continue + + os.makedirs(diff_dir) + + g = None + try: + g = guestfs.GuestFS(python_return_dict=True) + g.set_backend("direct") + g.add_drive_opts(image_path, readonly=1) + g.launch() + + # Mount the disks in the guestfs VM + root = g.findfs_label("root") + g.mount_ro(root, "/") + boot = g.findfs_label("boot") + g.mount_ro(boot, "/boot") + efi = g.findfs_label("EFI-SYSTEM") + g.mount_ro(efi, "/boot/efi") + + with tempfile.NamedTemporaryFile(suffix=".tar", delete=True) as tmp_tar: + # Exclude ostree/repo/objects. It just adds noise to the diff + excludes = ['*ostree/repo/objects/*'] + g.tar_out("/", tmp_tar.name, xattrs=True, selinux=True, excludes=excludes) + # Extract the tarball. Normalize the output by replacing sha256sum hashes + # in filenames with XXXXXXXXXXXXXXXX so that we can get a real diff between + # two of the same files in different builds. + runcmd(['tar', '-xf', tmp_tar.name, '-C', diff_dir, + '--transform', 's|[[:xdigit:]]{64}|XXXXXXXXXXXXXXXX|gx']) + + except Exception as e: + print(f"Error in guestfs process for {image_path}: {e}", file=sys.stderr) + raise + finally: + if g: + g.close() + + # Some files like /etc/shadow and sudo have no read permissions so let's + # open it up so the difftool can access it. + runcmd(['find', diff_dir, '-type', 'f', '-perm', '000', + '-exec', 'chmod', '--verbose', '444', '{}', ';']) + runcmd(['find', diff_dir, '-type', 'f', '-perm', '111', + '-exec', 'chmod', '--verbose', '555', '{}', ';']) + + # Allow the caller to operate on these values + return diff_dir_from, diff_dir_to def diff_metal(diff_from, diff_to): - for mount_dir_from, mount_dir_to in diff_metal_helper(diff_from, diff_to): - git_diff(mount_dir_from, mount_dir_to) + mount_dir_from, mount_dir_to = diff_metal_helper(diff_from, diff_to) + git_diff(mount_dir_from, mount_dir_to) def diff_metal_du(diff_from, diff_to): - for mount_dir_from, mount_dir_to in diff_metal_helper(diff_from, diff_to): - cmd = ['find', '.', '-type', 'd', '-exec', 'du', '-sh', '{}', ';'] - diff_cmd_outputs(cmd, mount_dir_from, mount_dir_to, strategy=DiffCmdOutputStrategy.CD) + mount_dir_from, mount_dir_to = diff_metal_helper(diff_from, diff_to) + cmd = ['find', '.', '-type', 'd', '-exec', 'du', '-sh', '{}', ';'] + diff_cmd_outputs(cmd, mount_dir_from, mount_dir_to, strategy=DiffCmdOutputStrategy.CD) def diff_metal_ls(diff_from, diff_to): - for mount_dir_from, mount_dir_to in diff_metal_helper(diff_from, diff_to): - cmd = ['find', '.'] - diff_cmd_outputs(cmd, mount_dir_from, mount_dir_to, strategy=DiffCmdOutputStrategy.CD) + mount_dir_from, mount_dir_to = diff_metal_helper(diff_from, diff_to) + cmd = ['find', '.'] + diff_cmd_outputs(cmd, mount_dir_from, mount_dir_to, strategy=DiffCmdOutputStrategy.CD) def diff_cmd_outputs(cmd, path_from, path_to, strategy: DiffCmdOutputStrategy = DiffCmdOutputStrategy.TEMPLATE):