diff --git a/src/cfengine_cli/dev.py b/src/cfengine_cli/dev.py index 7ca5744..d2bcf2f 100644 --- a/src/cfengine_cli/dev.py +++ b/src/cfengine_cli/dev.py @@ -1,5 +1,7 @@ import os -from cfbs.commands import generate_release_information_command +from cfengine_cli.masterfiles.generate_release_information import ( + generate_release_information_impl, +) from cfengine_cli.utils import UserError from cfengine_cli.deptool import ( update_dependency_tables as _update_dependency_tables, @@ -8,6 +10,13 @@ from cfengine_cli.docs import update_docs, check_docs +def generate_release_information_command( + omit_download=False, check=False, min_version=None +): + generate_release_information_impl(omit_download, check, min_version) + return 0 + + def _continue_prompt() -> bool: answer = None while answer not in ("y", "n", "yes", "no"): diff --git a/src/cfengine_cli/masterfiles/__init__.py b/src/cfengine_cli/masterfiles/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cfengine_cli/masterfiles/analyze.py b/src/cfengine_cli/masterfiles/analyze.py new file mode 100644 index 0000000..b56da27 --- /dev/null +++ b/src/cfengine_cli/masterfiles/analyze.py @@ -0,0 +1,93 @@ +from collections import OrderedDict +import os + +from cfbs.utils import dict_sorted_by_key, file_sha256, version_as_comparable_list + +Version = str + + +def initialize_vcf(): + versions_dict = {"versions": {}} + checksums_dict = {"checksums": {}} + files_dict = {"files": {}} + + return versions_dict, checksums_dict, files_dict + + +def versions_checksums_files( + files_dir_path, version, versions_dict, checksums_dict, files_dict +): + for root, _, files in os.walk(files_dir_path): + for name in files: + full_relpath = os.path.join(root, name) + tarball_relpath = os.path.relpath(full_relpath, files_dir_path) + file_checksum = file_sha256(full_relpath) + + if version not in versions_dict["versions"]: + versions_dict["versions"][version] = {} + versions_dict["versions"][version][tarball_relpath] = file_checksum + + if file_checksum not in checksums_dict["checksums"]: + checksums_dict["checksums"][file_checksum] = {} + if tarball_relpath not in checksums_dict["checksums"][file_checksum]: + checksums_dict["checksums"][file_checksum][tarball_relpath] = [] + checksums_dict["checksums"][file_checksum][tarball_relpath].append(version) + + if tarball_relpath not in files_dict["files"]: + files_dict["files"][tarball_relpath] = {} + if file_checksum not in files_dict["files"][tarball_relpath]: + files_dict["files"][tarball_relpath][file_checksum] = [] + files_dict["files"][tarball_relpath][file_checksum].append(version) + + return versions_dict, checksums_dict, files_dict + + +def finalize_vcf(versions_dict, checksums_dict, files_dict): + # explicitly sort VCF data to ensure determinism + + # checksums.json: + working_dict = checksums_dict["checksums"] + for c in working_dict.keys(): + for f in working_dict[c].keys(): + # sort each version list, descending + working_dict[c][f] = sorted( + working_dict[c][f], + key=lambda v: version_as_comparable_list(v), + reverse=True, + ) + # sort filepaths, alphabetically + working_dict[c] = dict_sorted_by_key(working_dict[c]) + # sort checksums + checksums_dict["checksums"] = dict_sorted_by_key(working_dict) + + # files.json: + working_dict = files_dict["files"] + # sort each list, first by version descending, then by checksum + for f in working_dict.keys(): + for c in working_dict[f].keys(): + # sort each version list, descending + working_dict[f][c] = sorted( + working_dict[f][c], + key=lambda v: version_as_comparable_list(v), + reverse=True, + ) + # sort checksums + working_dict[f] = dict_sorted_by_key(working_dict[f]) + # sort files, alphabetically + files_dict["files"] = dict_sorted_by_key(working_dict) + + # versions.json: + working_dict = versions_dict["versions"] + # sort files of each version + for v in working_dict.keys(): + working_dict[v] = dict_sorted_by_key(working_dict[v]) + # sort version numbers, in decreasing order + versions_dict["versions"] = OrderedDict( + sorted( + working_dict.items(), + key=lambda p: version_as_comparable_list(p[0]), + reverse=True, + ) + ) + + return versions_dict, checksums_dict, files_dict diff --git a/src/cfengine_cli/masterfiles/check_download_matches_git.py b/src/cfengine_cli/masterfiles/check_download_matches_git.py new file mode 100644 index 0000000..0e54e7d --- /dev/null +++ b/src/cfengine_cli/masterfiles/check_download_matches_git.py @@ -0,0 +1,108 @@ +import os +from collections import OrderedDict + +from cfbs.utils import ( + dict_diff, + read_json, + CFBSExitError, + write_json, + version_as_comparable_list, +) + + +def check_download_matches_git(versions): + """Check that the downloadable files match the git files. + + This can be used to monitor / detect if something has been changed, accidentally or maliciously. + + Generates a `differences-*.txt` file for each version. + """ + assert os.path.isfile("versions.json") + assert os.path.isfile("versions-git.json") + + download_versions_dict = read_json("versions.json") + git_versions_dict = read_json("versions-git.json") + + assert download_versions_dict is not None + assert git_versions_dict is not None + + diffs_dict = {"differences": {}} + + nonmatching_versions = [] + extraneous_count = 0 + differing_count = 0 + + for version in versions: + dl_version_files_dict = download_versions_dict["versions"][version] + git_version_files_dict = git_versions_dict["versions"][version] + + # normalize downloaded version dictionary filepaths + # necessary because the downloaded version and git version dictionaries have filepaths of different forms + new_download_dict = {} + for key, value in dl_version_files_dict.items(): + if key.startswith("masterfiles/"): + key = key[12:] + new_download_dict[key] = value + dl_version_files_dict = new_download_dict + + version_diffs_dict = {} + version_diffs_dict["files_only_in_downloads"] = [] + version_diffs_dict["files_only_in_git"] = [] + version_diffs_dict["files_with_different_content"] = [] + + only_dl, only_git, value_diff = dict_diff( + dl_version_files_dict, git_version_files_dict + ) + + for filepath in only_dl: + version_diffs_dict["files_only_in_downloads"].append(filepath) + for filepath in only_git: + version_diffs_dict["files_only_in_git"].append(filepath) + for filepath, _, _ in value_diff: + version_diffs_dict["files_with_different_content"].append(filepath) + + diffs_dict["differences"][version] = version_diffs_dict + + if len(only_dl) > 0 or len(value_diff) > 0: + nonmatching_versions.append(version) + extraneous_count += len(only_dl) + differing_count += len(value_diff) + + nonmatching_versions.sort(key=lambda v: version_as_comparable_list(v), reverse=True) + + # fully sort differences.json: + working_dict = diffs_dict["differences"] + # sort filepaths of each version, alphabetically + for k in working_dict.keys(): + working_dict[k]["files_only_in_downloads"].sort() + working_dict[k]["files_only_in_git"].sort() + working_dict[k]["files_with_different_content"].sort() + # sort version numbers, in decreasing order + diffs_dict["differences"] = OrderedDict( + sorted( + working_dict.items(), + key=lambda p: version_as_comparable_list(p[0]), + reverse=True, + ) + ) + + write_json("differences.json", diffs_dict) + + if len(nonmatching_versions) > 0: + raise CFBSExitError( + "The masterfiles downloaded from github.com and cfengine.com do not match - found " + + str(extraneous_count) + + " extraneous file" + + ("" if extraneous_count == 1 else "s") + + " and " + + str(differing_count) + + " differing file" + + ("" if differing_count == 1 else "s") + + " across " + + str(len(nonmatching_versions)) + + " version" + + ("" if len(nonmatching_versions) == 1 else "s") + + " (" + + ", ".join(nonmatching_versions) + + "). See ./differences.json" + ) diff --git a/src/cfengine_cli/masterfiles/download.py b/src/cfengine_cli/masterfiles/download.py new file mode 100644 index 0000000..388f2e0 --- /dev/null +++ b/src/cfengine_cli/masterfiles/download.py @@ -0,0 +1,196 @@ +import os +import shutil + +from cfbs.utils import ( + CFBSNetworkError, + fetch_url, + get_json, + mkdir, + CFBSExitError, + version_is_at_least, +) + +ENTERPRISE_RELEASES_URL = "https://cfengine.com/release-data/enterprise/releases.json" + + +COMMUNITY_ONLY_VERSIONS = ["3.12.0b1", "3.10.0b1"] +"""Masterfiles versions which do not appear in Enterprise releases but appear in Community releases.""" + +MISSING_DATA_VERSIONS = ["3.10.0", "3.9.2"] +"""Rationale for each version: +* 3.10.0:\\ + For some reason, the `"Masterfiles ready-to-install tarball"` is a .tar.gz tarball, rather than a .pkg.tar.gz tarball. + However, an unlisted analoguous URL for the .pkg.tar.gz tarball does exist. +* 3.9.2:\\ + No masterfiles are listed in the release data, but an unlisted analoguous URL does exist.""" + +HARDCODED_VERSIONS = COMMUNITY_ONLY_VERSIONS + MISSING_DATA_VERSIONS + +HARDCODED_URLS = { + "3.12.0b1": "https://cfengine-package-repos.s3.amazonaws.com/community_binaries/Community-3.12.0b1/misc/cfengine-masterfiles-3.12.0b1.pkg.tar.gz", + "3.10.0b1": "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.10.0b1.pkg.tar.gz", + "3.10.0": "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.10.0.pkg.tar.gz", + "3.9.2": "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.9.2.pkg.tar.gz", +} +HARDCODED_CHECKSUMS = { + "3.12.0b1": "ede305dae7be3edfac04fc5b7f63b46adb3a5b1612f4755e855ee8e6b8d344d7", + "3.10.0b1": "09291617254705d79dea2531b23dbd0754f09029e90ce0b43b275aa02c1223a3", + "3.10.0": "7b5e237529e11ce4ae295922dad1a681f13b95f3a7d247d39d3f5088f1a1d7d3", + "3.9.2": "ae1a758530d4a4aad5b6812b61fc37ad1b5900b755f88a1ab98da7fd05a9f5cc", +} + + +def get_download_urls_enterprise(min_version=None): + download_urls = {} + reported_checksums = {} + + print("* gathering download URLs...") + + try: + data = get_json(ENTERPRISE_RELEASES_URL) + except CFBSNetworkError: + raise CFBSExitError( + "Downloading CFEngine release data failed - check your Wi-Fi / network settings." + ) + + for release_data in data["releases"]: + version = release_data["version"] + + if not version_is_at_least(version, min_version): + continue + + if version in MISSING_DATA_VERSIONS: + download_urls[version] = HARDCODED_URLS[version] + reported_checksums[version] = HARDCODED_CHECKSUMS[version] + continue + + release_url = release_data["URL"] + try: + subdata = get_json(release_url) + except CFBSNetworkError: + raise CFBSExitError( + "Downloading CFEngine release data for version %s failed - check your Wi-Fi / network settings." + % version + ) + artifacts_data = subdata["artifacts"] + + if "Additional Assets" not in artifacts_data: + # happens for 3.9.0b1, 3.8.0b1, 3.6.1, 3.6.0 + continue + + masterfiles_data = None + for asset in artifacts_data["Additional Assets"]: + if asset["Title"] == "Masterfiles ready-to-install tarball": + masterfiles_data = asset + + if masterfiles_data is None: + # happens for 3.9.2, 3.9.0, 3.8.2, 3.8.1, 3.8.0, 3.7.4--3.6.2 + # 3.9.2: see above + # 3.9.0 and below: no masterfiles listed, and unlisted analogous URLs seemingly do not exist + continue + + download_urls[version] = masterfiles_data["URL"] + reported_checksums[version] = masterfiles_data["SHA256"] + + return download_urls, reported_checksums + + +def get_all_download_urls(min_version=None): + download_urls, reported_checksums = get_download_urls_enterprise(min_version) + + for version in COMMUNITY_ONLY_VERSIONS: + if version_is_at_least(version, min_version): + download_urls[version] = HARDCODED_URLS[version] + reported_checksums[version] = HARDCODED_CHECKSUMS[version] + + return download_urls, reported_checksums + + +def get_single_download_url(version): + if version in HARDCODED_VERSIONS: + download_url = HARDCODED_URLS[version] + reported_checksum = HARDCODED_CHECKSUMS[version] + return (download_url, reported_checksum) + + try: + data = get_json(ENTERPRISE_RELEASES_URL) + except CFBSNetworkError: + raise CFBSExitError( + "Downloading CFEngine release data failed - check your Wi-Fi / network settings." + ) + + for release_data in data["releases"]: + release_version = release_data["version"] + + if release_version == version: + release_url = release_data["URL"] + try: + subdata = get_json(release_url) + except CFBSNetworkError: + raise CFBSExitError( + "Downloading CFEngine release data for version %s failed - check your Wi-Fi / network settings." + % version + ) + artifacts_data = subdata["artifacts"] + + if "Additional Assets" not in artifacts_data: + break + + for asset in artifacts_data["Additional Assets"]: + if asset["Title"] == "Masterfiles ready-to-install tarball": + download_url = asset["URL"] + reported_checksum = asset["SHA256"] + + return (download_url, reported_checksum) + + raise CFBSExitError("Download URL of given MPF version was not found") + + +def download_versions_from_urls(download_path, download_urls, reported_checksums): + downloaded_versions = [] + + mkdir(download_path) + + for version, url in download_urls.items(): + # ignore master and .x versions + if url.startswith("http://buildcache"): + continue + + print("* downloading from", url) + downloaded_versions.append(version) + + version_path = os.path.join(download_path, version) + mkdir(version_path) + + # download a version, and verify the reported checksum matches + filename = url.split("/")[-1] + tarball_path = os.path.join(version_path, filename) + checksum = reported_checksums[version] + try: + fetch_url(url, tarball_path, checksum) + except CFBSNetworkError as e: + raise CFBSExitError("For version " + version + ": " + str(e)) + + tarball_dir_path = os.path.join(version_path, "tarball") + shutil.unpack_archive(tarball_path, tarball_dir_path) + + return downloaded_versions + + +def download_all_versions(download_path, min_version=None): + download_urls, reported_checksums = get_all_download_urls(min_version) + + downloaded_versions = download_versions_from_urls( + download_path, download_urls, reported_checksums + ) + + return downloaded_versions + + +def download_single_version(download_path, version): + download_url, reported_checksum = get_single_download_url(version) + + download_urls = {version: download_url} + reported_checksums = {version: reported_checksum} + + download_versions_from_urls(download_path, download_urls, reported_checksums) diff --git a/src/cfengine_cli/masterfiles/generate_release_information.py b/src/cfengine_cli/masterfiles/generate_release_information.py new file mode 100644 index 0000000..2a23b05 --- /dev/null +++ b/src/cfengine_cli/masterfiles/generate_release_information.py @@ -0,0 +1,52 @@ +from cfbs.masterfiles.download import download_all_versions +from cfbs.masterfiles.generate_vcf_download import generate_vcf_download +from cfbs.masterfiles.generate_vcf_git_checkout import generate_vcf_git_checkout +from cfbs.masterfiles.check_download_matches_git import check_download_matches_git +from cfbs.utils import immediate_subdirectories, version_is_at_least + +DOWNLOAD_PATH = "downloaded_masterfiles" + + +def generate_release_information_impl( + omit_download=False, check=False, min_version=None +): + if not omit_download: + print("Downloading masterfiles...") + + downloaded_versions = download_all_versions(DOWNLOAD_PATH, min_version) + + print("Download finished. Every reported checksum matches.") + else: + downloaded_versions = immediate_subdirectories(DOWNLOAD_PATH) + + downloaded_versions = list( + filter( + lambda v: version_is_at_least(v, min_version), + downloaded_versions, + ) + ) + + print( + "Downloading releases of masterfiles from cfengine.com and generating release information..." + ) + generate_vcf_download(DOWNLOAD_PATH, downloaded_versions) + + if check: + print( + "Downloading releases of masterfiles from git (github.com) and generating " + + "additional release information for comparison..." + ) + generate_vcf_git_checkout(downloaded_versions) + print("Candidate release information generated.") + print("Comparing files from cfengine.com and github.com...") + + check_download_matches_git(downloaded_versions) + + print("The masterfiles downloaded from github.com and cfengine.com match.") + else: + print("Release information successfully generated.") + print("See the results in ./masterfiles/") + print( + "(Run again with --check-against-git to download and compare with files " + + "from git, and generate -git.json files)" + ) diff --git a/src/cfengine_cli/masterfiles/generate_vcf_download.py b/src/cfengine_cli/masterfiles/generate_vcf_download.py new file mode 100644 index 0000000..c65c2c6 --- /dev/null +++ b/src/cfengine_cli/masterfiles/generate_vcf_download.py @@ -0,0 +1,33 @@ +import os + +from cfbs.utils import write_json +from cfbs.masterfiles.analyze import ( + finalize_vcf, + initialize_vcf, + versions_checksums_files, +) + + +def generate_vcf_download(dir_path, downloaded_versions): + """`dir_path`: the path of the directory containing masterfiles versions + subdirectories in the form `dir_path/x.y.z/tarball/` + + The `tarball` folder should contain the `masterfiles` folder (older + tarballs also have a `modules` folder alongside the `masterfiles` folder). + """ + versions_dict, checksums_dict, files_dict = initialize_vcf() + + for version in downloaded_versions: + files_dir_path = os.path.join(dir_path, version, "tarball") + + versions_dict, checksums_dict, files_dict = versions_checksums_files( + files_dir_path, version, versions_dict, checksums_dict, files_dict + ) + + versions_dict, checksums_dict, files_dict = finalize_vcf( + versions_dict, checksums_dict, files_dict + ) + + write_json("./masterfiles/versions.json", versions_dict) + write_json("./masterfiles/checksums.json", checksums_dict) + write_json("./masterfiles/files.json", files_dict) diff --git a/src/cfengine_cli/masterfiles/generate_vcf_git_checkout.py b/src/cfengine_cli/masterfiles/generate_vcf_git_checkout.py new file mode 100644 index 0000000..9a1ffa8 --- /dev/null +++ b/src/cfengine_cli/masterfiles/generate_vcf_git_checkout.py @@ -0,0 +1,102 @@ +import os +import shutil +import subprocess +import sys + +from cfbs.utils import write_json +from cfbs.masterfiles.analyze import ( + finalize_vcf, + initialize_vcf, + versions_checksums_files, +) + +DIR_PATH = "." +"""The path of the working directory.""" + +MPF_URL = "https://github.com/cfengine/masterfiles" +MPF_PATH = os.path.join(DIR_PATH, "masterfiles") + + +def check_required_command(command): + if not shutil.which(command): + print("`%s` was not found" % command) + sys.exit(1) + + +def check_required_commands(commands): + for c in commands: + check_required_command(c) + + +def generate_vcf_git_checkout(checkout_tags): + required_commands = ["git", "make", "automake", "autoconf"] + check_required_commands(required_commands) + + # get the current version of the MPF repo + if not os.path.isdir(MPF_PATH): + subprocess.run( + ["git", "clone", "--no-checkout", MPF_URL], + cwd=DIR_PATH, + check=True, + ) + else: + subprocess.run( + ["git", "fetch", "--all", "--tags", "--force"], + cwd=MPF_PATH, + check=True, + ) + + versions_dict, checksums_dict, files_dict = initialize_vcf() + + for tag in checkout_tags: + print("Checking out tag", tag) + + # check out the version + subprocess.run( + ["git", "checkout", tag], + cwd=MPF_PATH, + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + # build masterfiles from git as they are in the tarball packages + # for the files of this version to be reproducible, the `EXPLICIT_RELEASE` + # environment variable needs to be set to what it was when the downloadable + # files were built + if tag == "3.18.3": + release_number = "2" + else: + release_number = "1" + subprocess.run( + ["./autogen.sh"], + cwd=MPF_PATH, + check=True, + env=dict( + os.environ.copy(), EXPLICIT_VERSION=tag, EXPLICIT_RELEASE=release_number + ), + ) + # older masterfiles version READMEs instruct to use `make install` and newer `make` - always use `make` instead + subprocess.run(["make"], cwd=MPF_PATH, check=True) + + # compute VCF data for all the files + versions_dict, checksums_dict, files_dict = versions_checksums_files( + MPF_PATH, tag, versions_dict, checksums_dict, files_dict + ) + + # clean the files to prevent spillage to other versions + subprocess.run( + ["git", "clean", "-dfx"], + cwd=MPF_PATH, + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + versions_dict, checksums_dict, files_dict = finalize_vcf( + versions_dict, checksums_dict, files_dict + ) + + write_json("versions-git.json", versions_dict) + write_json("checksums-git.json", checksums_dict) + write_json("files-git.json", files_dict)