Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
258 changes: 236 additions & 22 deletions src/cmd-diff
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import argparse
import os
import shutil
import json
import subprocess
import sys
import tempfile
import time
import rpm
from multiprocessing import Process

from dataclasses import dataclass
Expand All @@ -24,11 +26,78 @@ class DiffBuildTarget:
id: str
dir: str
meta: dict
commitmeta: dict

@staticmethod
def from_build(builds, build, arch):
return DiffBuildTarget(build, builds.get_build_dir(build, arch),
builds.get_build_meta(build, arch))
builds.get_build_meta(build, arch),
builds.get_build_commitmeta(build, arch))


class PackageDiffType(IntEnum):
ADD = 0
REMOVE = 1
UPGRADE = 2
DOWNGRADE = 3


@dataclass
class Package:
name: str
epoch: int
v: str # Version
r: str # Release
a: str # Architecture

def evr(self):
epoch_string = "" if self.epoch == "0" else f"{self.epoch}:"
return f"{epoch_string}{self.v}-{self.r}"

# Compare versions
# rc > 0 -> Newer; rc == 0 -> Same; rc < 0 -> Older
@staticmethod
def vercmp(from_pkg, to_pkg):
# pylint: disable=E1101
return rpm.labelCompare((to_pkg.epoch, to_pkg.v, to_pkg.r),
(from_pkg.epoch, from_pkg.v, from_pkg.r))

# Return a data sctructure representing a diff entry from the
# rpm-ostree db diff --json output.
# "pkgdiff" : [
# [
# "NetworkManager",
# 2,
# {
# "PreviousPackage" : [
# "NetworkManager",
# "1:1.52.0-5.el9_6",
# "x86_64"
# ],
# "NewPackage" : [
# "NetworkManager",
# "1:1.52.0-7.el9_6",
# "x86_64"
# ]
# }
# ],
@staticmethod
def to_json_diff_entry(difftype: PackageDiffType, from_pkg, to_pkg):
name = from_pkg.name if from_pkg else to_pkg.name
change = ""
match difftype:
case PackageDiffType.ADD:
change = {"NewPackage": [to_pkg.name, to_pkg.evr(), to_pkg.a]}
case PackageDiffType.REMOVE:
change = {"PreviousPackage": [from_pkg.name, from_pkg.evr(), from_pkg.a]}
case PackageDiffType.UPGRADE | PackageDiffType.DOWNGRADE:
change = {
"PreviousPackage": [from_pkg.name, from_pkg.evr(), from_pkg.a],
"NewPackage": [to_pkg.name, to_pkg.evr(), to_pkg.a]
}
case _:
raise Exception(f"Invalid PackageDiffType: {difftype}")
return [name, difftype, change]


class OSTreeImport(IntEnum):
Expand All @@ -37,6 +106,11 @@ class OSTreeImport(IntEnum):
FULL = 3


class DiffCmdOutputStrategy(IntEnum):
CD = 1
TEMPLATE = 2


@dataclass
class Differ:
name: str
Expand All @@ -49,11 +123,20 @@ TMP_REPO = 'tmp/repo'

DIFF_CACHE = 'tmp/diff-cache'

# Global variable that can be set once and direct the underlying code to leverage
# a difftool with git diffing or not. This could be an argument passed around to
# underlying functions, but decided to just implement it as a global variable for now.
USE_DIFFTOOL = False


def main():
args = parse_args()
builds = Builds()

# Modify the USE_DIFFTOOL global based on the --difftool argument
global USE_DIFFTOOL
USE_DIFFTOOL = args.difftool

latest_build = builds.get_latest()

os.makedirs(DIFF_CACHE, exist_ok=True)
Expand All @@ -64,7 +147,7 @@ def main():
args.diff_from = builds.get_previous()
args.diff_to = latest_build
elif args.diff_from is None:
args.diff_from = latest_build
args.diff_from = builds.get_previous()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, what were you trying to do that surprised you and motivated this?

E.g. if I do cosa diff --to=$build I expect the from to be the latest build. This matches git diff semantics. Having it actually diff against the previous build is confusing.

Copy link
Member Author

@dustymabe dustymabe Sep 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See the last commit in #4327

Basically I wanted to be able to not have to specify the previous build in cmd-build-with-buildah and have cosa diff detect that for me.

It also enables things like being able to run cosa diff --rpms without having to specify a --to and a --from at all, like rpm-ostree db diff does if you don't provide commits to it.

Maybe the behavior should be "if to is latest_build then from will default to previous_build, otherwise default to latest_build".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It also enables things like being able to run cosa diff --rpms without having to specify a --to and a --from at all, like rpm-ostree db diff does if you don't provide commits to it.

Hmm, that's already the case today, no?

ISTM like in the last commit of #4327, we can just not pass in --to or --from at all in the default case. If there's a parent build, just passing --from should do the right thing.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, that's already the case today, no?

You are right. My mistake. I didn't see the opportunity to not pass in the --to there.

Reverted in #4327

elif args.diff_to is None:
args.diff_to = latest_build

Expand Down Expand Up @@ -109,6 +192,7 @@ def parse_args():
parser.add_argument("--to", dest='diff_to', help="Second build ID")
parser.add_argument("--gc", action='store_true', help="Delete cached diff content")
parser.add_argument("--arch", dest='arch', help="Architecture of builds")
parser.add_argument("--difftool", action='store_true', help="Use git difftool")

for differ in DIFFERS:
parser.add_argument("--" + differ.name, action='store_true', default=False,
Expand Down Expand Up @@ -143,10 +227,104 @@ def diff_source_control(diff_from, diff_to):
print(f" --> {origin_url}/compare/{config_from['commit'][:7]}...{config_to['commit'][:7]}")


def diff_rpms(diff_from, diff_to):
def diff_rpms_rpm_ostree_json(diff_from, diff_to):
diff_rpms_rpm_ostree(diff_from, diff_to, json=True)


def diff_rpms_rpm_ostree_no_json(diff_from, diff_to):
diff_rpms_rpm_ostree(diff_from, diff_to, json=False)


def diff_rpms_rpm_ostree(diff_from, diff_to, json):
ref_from = diff_from.id
ref_to = diff_to.id
runcmd(['rpm-ostree', 'db', 'diff', '--repo', TMP_REPO, ref_from, ref_to])
cmd = ['rpm-ostree', 'db', 'diff', '--repo', TMP_REPO, ref_from, ref_to]
if json:
cmd.append('--format=json')
runcmd(cmd)


def diff_rpms_commitmeta_json(diff_from, diff_to):
diff_rpms_commitmeta(diff_from, diff_to, json_output=True)


def diff_rpms_commitmeta_no_json(diff_from, diff_to):
diff_rpms_commitmeta(diff_from, diff_to, json_output=False)


def diff_rpms_commitmeta(diff_from, diff_to, json_output=False):
"""
Diff the commitmeta.json files between two builds.
"""
from_meta = diff_from.meta
to_meta = diff_to.meta
from_commitmeta = diff_from.commitmeta
to_commitmeta = diff_to.commitmeta

# Gather info about upgraded, downgraded, added, removed RPMS
from_rpms = {pkg[0]: Package(*pkg) for pkg in from_commitmeta.get('rpmostree.rpmdb.pkglist', [])}
to_rpms = {pkg[0]: Package(*pkg) for pkg in to_commitmeta.get('rpmostree.rpmdb.pkglist', [])}

added_rpms = set(to_rpms.keys()) - set(from_rpms.keys())
removed_rpms = set(from_rpms.keys()) - set(to_rpms.keys())
common_rpms = set(from_rpms.keys()) & set(to_rpms.keys())

upgraded_rpms = []
downgraded_rpms = []
for name in common_rpms:
from_pkg = from_rpms[name]
to_pkg = to_rpms[name]
rc = Package.vercmp(from_pkg, to_pkg)
if rc > 0:
upgraded_rpms.append((name, from_pkg, to_pkg))
elif rc == 0:
pass # They are equal versions
elif rc < 0:
downgraded_rpms.append((name, from_pkg, to_pkg))

if json_output:
diff = {
"ostree-commit-from": from_meta.get('ostree-commit'),
"ostree-commit-to": to_meta.get('ostree-commit'),
"pkgdiff": [],
"advisories": []
}
for name in sorted(added_rpms):
diff['pkgdiff'].append(Package.to_json_diff_entry(PackageDiffType.ADD, from_pkg=None, to_pkg=to_rpms[name]))
for name in sorted(removed_rpms):
diff['pkgdiff'].append(Package.to_json_diff_entry(PackageDiffType.REMOVE, from_pkg=from_rpms[name], to_pkg=None))
for name, from_pkg, to_pkg in sorted(upgraded_rpms):
diff['pkgdiff'].append(Package.to_json_diff_entry(PackageDiffType.UPGRADE, from_pkg=from_pkg, to_pkg=to_pkg))
for name, from_pkg, to_pkg in sorted(downgraded_rpms):
diff['pkgdiff'].append(Package.to_json_diff_entry(PackageDiffType.DOWNGRADE, from_pkg=from_pkg, to_pkg=to_pkg))

# Only add advisory info for the JSON output
from_advisories = {adv[0]: adv for adv in from_commitmeta.get('rpmostree.advisories', [])}
to_advisories = {adv[0]: adv for adv in to_commitmeta.get('rpmostree.advisories', [])}
added_advisories = set(to_advisories.keys()) - set(from_advisories.keys())
for name in added_advisories:
diff['advisories'].append(to_advisories[name])
# Dump the JSON to stdout
print(json.dumps(diff, indent=2))
else:
print(f"ostree diff commit from: {diff_from.id} ({from_meta.get('ostree-commit')})")
print(f"ostree diff commit to: {diff_to.id} ({to_meta.get('ostree-commit')})")
if upgraded_rpms:
print("Upgraded:")
for name, from_pkg, to_pkg in sorted(upgraded_rpms):
print(f" {name} {from_pkg.evr()} -> {to_pkg.evr()}")
if downgraded_rpms:
print("Downgraded:")
for name, from_pkg, to_pkg in sorted(downgraded_rpms):
print(f" {name} {from_pkg.evr()} -> {to_pkg.evr()}")
if removed_rpms:
print("Removed:")
for name in sorted(removed_rpms):
print(f" {name}-{from_rpms[name].evr()}")
if added_rpms:
print("Added:")
for name in sorted(added_rpms):
print(f" {name}-{to_rpms[name].evr()}")


def diff_ostree_ls(diff_from, diff_to):
Expand Down Expand Up @@ -349,7 +527,10 @@ def run_guestfs_mount(image_path, mount_target):
g.close()


def diff_metal(diff_from, diff_to):
# Generator that will mount up metal image filesystems and yield
# the paths to be used for analysis and then clean up once given back
# control.
def diff_metal_helper(diff_from, diff_to):
metal_from = get_metal_path(diff_from)
metal_to = get_metal_path(diff_to)

Expand Down Expand Up @@ -382,8 +563,8 @@ def diff_metal(diff_from, diff_to):
if not p.is_alive():
raise Exception(f"A guestfs process for {os.path.basename(d)} died unexpectedly.")

# Now that the mounts are live, we can diff them
git_diff(mount_dir_from, mount_dir_to)
# Allow the caller to operate on these values
yield mount_dir_from, mount_dir_to
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't a more appropriate model for the single yield case be to just take a function as argument?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess. Like a callback?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looked into this a little, but time boxed myself on it and ran out of time. Can we leave it and improve later?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basically I mean something like

def diff_metal_ls(diff_from, diff_to):
	cmd = ['find', '.']
	def differ(mount_dir_from, mount_dir_to):
		diff_cmd_outputs(cmd, mount_dir_from, mount_dir_to, strategy='cd')
	diff_metal_helper(diff_from, diff_to, differ)

or did that not work?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I honestly can't remember what I tried at this point. It looks like this will change soon anyway with #4333 so I'd prefer not to rework it now if I don't have to.


finally:
# Unmount the FUSE binds, this will make the guestfs mount calls return
Expand All @@ -401,23 +582,45 @@ def diff_metal(diff_from, diff_to):
shutdown_process(p_to)


def diff_cmd_outputs(cmd, file_from, file_to):
with tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as f_from, \
tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as f_to:
if '{}' not in cmd:
cmd += ['{}']
idx = cmd.index('{}')
cmd_from = list(cmd)
cmd_from[idx] = file_from
subprocess.run(cmd_from, check=True, stdout=f_from).stdout
cmd_to = list(cmd)
cmd_to[idx] = file_to
subprocess.run(cmd_to, check=True, stdout=f_to).stdout
git_diff(f_from.name, f_to.name)
def diff_metal(diff_from, diff_to):
for mount_dir_from, mount_dir_to in diff_metal_helper(diff_from, diff_to):
git_diff(mount_dir_from, mount_dir_to)


def diff_metal_du(diff_from, diff_to):
for mount_dir_from, mount_dir_to in diff_metal_helper(diff_from, diff_to):
cmd = ['find', '.', '-type', 'd', '-exec', 'du', '-sh', '{}', ';']
diff_cmd_outputs(cmd, mount_dir_from, mount_dir_to, strategy=DiffCmdOutputStrategy.CD)


def diff_metal_ls(diff_from, diff_to):
for mount_dir_from, mount_dir_to in diff_metal_helper(diff_from, diff_to):
cmd = ['find', '.']
diff_cmd_outputs(cmd, mount_dir_from, mount_dir_to, strategy=DiffCmdOutputStrategy.CD)


def diff_cmd_outputs(cmd, path_from, path_to, strategy: DiffCmdOutputStrategy = DiffCmdOutputStrategy.TEMPLATE):
workingdir = None
with tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as from_output, \
tempfile.NamedTemporaryFile(prefix=cmd[0] + '-') as to_output:
for path, output in (path_from, from_output), (path_to, to_output):
c = list(cmd)
if strategy == DiffCmdOutputStrategy.TEMPLATE:
if '{}' not in c:
c += ['{}']
idx = c.index('{}')
c[idx] = path
elif strategy == DiffCmdOutputStrategy.CD:
workingdir = path
subprocess.run(c, cwd=workingdir, check=True, stdout=output)
git_diff(from_output.name, to_output.name)


def git_diff(arg_from, arg_to):
runcmd(['git', 'diff', '--no-index', arg_from, arg_to], check=False)
subcmd = 'diff'
if USE_DIFFTOOL:
subcmd = 'difftool'
runcmd(['git', subcmd, '--no-index', arg_from, arg_to], check=False)


def cache_dir(dir):
Expand All @@ -428,7 +631,14 @@ def cache_dir(dir):

# unfortunately, this has to come at the end to resolve functions
DIFFERS = [
Differ("rpms", "Diff RPMs", needs_ostree=OSTreeImport.FULL, function=diff_rpms),
Differ("rpms-rpm-ostree", "Diff RPMs using rpm-ostree",
needs_ostree=OSTreeImport.FULL, function=diff_rpms_rpm_ostree_no_json),
Differ("rpms-rpm-ostree-json", "Diff RPMs & Advisories using rpm-ostree, output JSON",
needs_ostree=OSTreeImport.FULL, function=diff_rpms_rpm_ostree_json),
Differ("rpms", "Diff rpms from commitmeta.json",
needs_ostree=OSTreeImport.NO, function=diff_rpms_commitmeta_no_json),
Differ("rpms-json", "Diff RPMS & Advisories from commitmeta.json, output JSON",
needs_ostree=OSTreeImport.NO, function=diff_rpms_commitmeta_json),
Differ("source-control", "Diff config and COSA input commits",
needs_ostree=OSTreeImport.NO, function=diff_source_control),
Differ("ostree-ls", "Diff OSTree contents using 'ostree diff'",
Expand Down Expand Up @@ -457,6 +667,10 @@ DIFFERS = [
needs_ostree=OSTreeImport.NO, function=diff_metal_partitions),
Differ("metal", "Diff metal disk image content",
needs_ostree=OSTreeImport.NO, function=diff_metal),
Differ("metal-du", "Compare directory usage of metal disk image content",
needs_ostree=OSTreeImport.NO, function=diff_metal_du),
Differ("metal-ls", "Compare directory listing of metal disk image content",
needs_ostree=OSTreeImport.NO, function=diff_metal_ls),
]

if __name__ == '__main__':
Expand Down
5 changes: 5 additions & 0 deletions src/cosalib/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ def get_build_meta(self, build_id, basearch=None):
with open(os.path.join(d, 'meta.json')) as f:
return json.load(f)

def get_build_commitmeta(self, build_id, basearch=None):
d = self.get_build_dir(build_id, basearch)
with open(os.path.join(d, 'commitmeta.json')) as f:
return json.load(f)

def get_tags(self):
return self._data.get('tags', [])

Expand Down
3 changes: 3 additions & 0 deletions src/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,6 @@ python3-libguestfs

# For generating kubernetes YAML files (e.g Konflux resources)
kustomize

# For vimdiff
vim-enhanced
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, how can we structure this so that this functionality is not dependent on the $preferred_tool shipping in cosa? One option is that cosa diff --difftool in that case just outputs the command and you run it.

Or... it could also do something like systemd-run --user -t --same-dir --wait --collect git difftool ... maybe.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. I like git-delta so being able to plug my own tool would be nice. How about we juste write the git diff output to a file and then we can invoke the tool we want?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cosa diff --difftool in that case just outputs the command and you run it.

Only thing about this is it implies not cleaning up after itself.

Or... it could also do something like systemd-run --user -t --same-dir --wait --collect git difftool ... maybe.

This implies functionality we don't necessarily have today. i.e. in the run via bash function case where we run inside the COSA container, we don't have things mounted into the container that would allow it to run a systemd unit on the host.

It definitely sucks installing this into the COSA container too, but it was the easiest thing to do. We don't have to do that, i'd just continue to do what I've been doing, which is cosa shell and then sudo dnf install vim before running cosa diff --difftool

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only thing about this is it implies not cleaning up after itself.

Indeed. I think it's OK in that case to tell the user they're responsible for cleanup.

This implies functionality we don't necessarily have today. i.e. in the run via bash function case where we run inside the COSA container, we don't have things mounted into the container that would allow it to run a systemd unit on the host.

Right yeah, the idea would be to add another option to the alias.

It definitely sucks installing this into the COSA container too, but it was the easiest thing to do. We don't have to do that, i'd just continue to do what I've been doing, which is cosa shell and then sudo dnf install vim before running cosa diff --difftool

Not strongly against to be clear, but I think it'd be a larger win to try to make this work for everyone.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is a blocker I can remove the part in src/deps.txt that installs vim.

Loading