From 85d7d747126a164e512081be7d79706857ed1e95 Mon Sep 17 00:00:00 2001 From: eugeneswalker Date: Sun, 17 Aug 2025 13:54:10 -0700 Subject: [PATCH] paratools rift enhancements to support remote http+s3 annex, staging annex and gitlab ci --- .gitignore | 1 + lib/rift/Annex.py | 430 +++++++++++++++++++++++++++++++++++------ lib/rift/Config.py | 32 +++ lib/rift/Controller.py | 68 ++++--- lib/rift/RPM.py | 26 --- lib/rift/auth.py | 327 +++++++++++++++++++++++++++++++ lib/rift/patches.py | 12 ++ setup.py | 1 + 8 files changed, 784 insertions(+), 113 deletions(-) create mode 100644 lib/rift/auth.py diff --git a/.gitignore b/.gitignore index 3acf8104..dbf7922a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +build # Compiled python modules. *.pyc diff --git a/lib/rift/Annex.py b/lib/rift/Annex.py index 49ca08b8..6158e4e1 100644 --- a/lib/rift/Annex.py +++ b/lib/rift/Annex.py @@ -34,19 +34,27 @@ called an annex. """ +from urllib.parse import urlparse +import boto3 +import botocore import datetime import hashlib +import io import logging import os -import string +import requests import shutil +import string +import subprocess +import sys import tarfile -import time import tempfile +import time import yaml from rift.TempDir import TempDir from rift.Config import OrderedLoader +from rift.auth import auth # List of ASCII printable characters _TEXTCHARS = bytearray([9, 10, 13] + list(range(32, 127))) @@ -54,6 +62,8 @@ # Suffix of metadata filename _INFOSUFFIX = '.info' +def boto_404(error): + return error.response['Error']['Code'] == '404' def get_digest_from_path(path): """Get file id from the givent path""" @@ -113,8 +123,102 @@ class Annex(): RMODE = 0o644 WMODE = 0o664 - def __init__(self, config, path=None): - self.path = path or config.get('annex') + def __init__(self, config, annex_path=None, annex_push_path=None): + self.restore_cache = config.get('annex_restore_cache') + if self.restore_cache is not None: + self.restore_cache = os.path.expanduser(self.restore_cache) + + # Annex path + # should be either a filesystem path, or else http/https uri for an s3 endpoint + self.read_s3_endpoint = None + self.read_s3_bucket = None + self.read_s3_prefix = None + self.read_s3_client = None + self.annex_is_remote = None + self.annex_type = None + + self.annex_path = annex_path or config.get('annex') + + url = urlparse(self.annex_path, allow_fragments=False) + if url.scheme in ("http", "https"): + self.annex_is_remote = True + self.annex_type = url.scheme + elif url.scheme in ("", "file"): + self.annex_is_remote = False + self.annex_type = "file" + self.annex_path = url.path + else: + logging.error("invalid value for config option: 'annex'") + logging.error("the annex should be either a file:// path or http(s):// url") + sys.exit(1) + + self.annex_is_s3 = config.get('annex_is_s3') + if self.annex_is_s3: + if not self.annex_is_remote: + logging.error("invalid pairing of configuration settings for: annex, annex_is_s3") + logging.error("annex_is_s3 is True but the annex url is not an http(s) url, as required in this case") + sys.exit(1) + else: + parts = url.path.lstrip("/").split("/") + self.read_s3_endpoint = "{}://{}".format(url.scheme, url.netloc) + self.read_s3_bucket = parts[0] + self.read_s3_prefix = "/".join(parts[1:]) + + # Annex push path + # if specified in config, should be an http(s) url containing s3 endpoint, bucket, and prefix + self.annex_push_path = annex_push_path or config.get('annex_push') + self.push_over_s3 = False + self.push_s3_endpoint = None + self.push_s3_bucket = None + self.push_s3_prefix = None + self.push_s3_client = None + self.push_s3_auth = None + + if self.annex_push_path is not None: + url = urlparse(self.annex_push_path, allow_fragments=False) + parts = url.path.lstrip("/").split("/") + if url.scheme in ("http", "https"): + self.push_over_s3 = True + self.push_s3_endpoint = "{}://{}".format(url.scheme, url.netloc) + self.push_s3_bucket = parts[0] + self.push_s3_prefix = "/".join(parts[1:]) + self.push_s3_auth = auth(config) + elif url.scheme in ("file", ""): + self.annex_push_path = url.path + else: + # allow annex_push_path to default to annex when annex is s3:// or file:// + if self.annex_is_s3: + self.annex_push_path = self.annex_path + self.push_over_s3 = True + self.push_s3_endpoint = self.read_s3_endpoint + self.push_s3_bucket = self.read_s3_bucket + self.push_s3_prefix = self.read_s3_prefix + self.push_s3_auth = auth(config) + elif self.annex_type == "file": + self.annex_push_path = self.annex_path + self.push_over_s3 = False + + def get_read_s3_client(self): + if self.read_s3_client is None: + self.read_s3_client = boto3.client('s3', endpoint_url = self.read_s3_endpoint) + + return self.read_s3_client + + def get_push_s3_client(self): + if self.push_s3_client is not None: + return self.push_s3_client + + if not self.push_s3_auth.authenticate(): + logging.error("authentication failed; cannot get push_s3_client") + return None + + self.push_s3_client = boto3.client('s3', + aws_access_key_id = self.push_s3_auth.config["access_key_id"], + aws_secret_access_key = self.push_s3_auth.config["secret_access_key"], + aws_session_token = self.push_s3_auth.config["session_token"], + endpoint_url = self.push_s3_endpoint) + + return self.push_s3_client @classmethod def is_pointer(cls, filepath): @@ -129,12 +233,104 @@ def is_pointer(cls, filepath): return all(byte in string.hexdigits for byte in identifier) return False + def make_restore_cache(self): + if not os.path.isdir(self.restore_cache): + if os.path.exists(self.restore_cache): + logging.error("{} should be a directory".format(self.restore_cache)) + sys.exit(1) + os.makedirs(self.restore_cache) + + def get_cached_path(self, path): + return os.path.join(self.restore_cache, path) + def get(self, identifier, destpath): """Get a file identified by identifier and copy it at destpath.""" - # Copy file from repository to destination path - idpath = os.path.join(self.path, identifier) - logging.debug('Extracting %s to %s', identifier, destpath) - shutil.copyfile(idpath, destpath) + # 1. See if we can restore from cache + if self.restore_cache: + self.make_restore_cache() + cached_path = self.get_cached_path(identifer) + if os.path.isfile(cached_path): + logging.debug('Extract %s to %s using restore cache', identifier, destpath) + shutil.copyfile(cached_path, destpath) + return + + # 2. See if object is in the annex + if self.annex_is_remote: + # Checking annex, expecting annex path to be an http(s) url + success = False + + idpath = os.path.join(self.annex_path, identifier) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, identifier) + cmd = ["curl", "-sS", "-w", '"%{http_code}"', "-o", tmp_file, idpath] + try: + proc = subprocess.run(cmd, check=True, capture_output=True, text=True) + if "404" not in proc.stdout.strip(): + if self.restore_cache: + cached_path = self.get_cached_path(identifier) + shutil.move(tmp_file, cached_path) + else: + logging.debug('Extracting %s to %s', identifier, destpath) + shutil.move(tmp_file, destpath) + success = True + except Exception as e: + logging.error("failed to fetch file from annex: {}".format(e)) + + if success: + if self.restore_cache: + logging.debug('Extracting %s to %s', identifier, destpath) + cached_path = self.get_cached_path(identifier) + shutil.copyfile(cached_path, destpath) + return + else: + logging.info("did not find object in annex, will search annex_push next") + else: + # Checking annex, expecting annex path to be a filesystem location + logging.debug('Extracting %s to %s', identifier, destpath) + idpath = os.path.join(self.annex_path, identifier) + if os.path.exists(idpath): + shutil.copyfile(idpath, destpath) + return + else: + logging.info("did not find object in annex, will search annex_push next") + + # 3. See if object is in the annex_push location + if self.push_over_s3: + # Checking annex push, expecting annex push path to be an s3-providing http(s) url + key = os.path.join(self.push_s3_prefix, identifier) + + s3 = self.get_push_s3_client() + # s3.meta.events.register('choose-signer.s3.*', botocore.handlers.disable_signing) + + success = False + with tempfile.NamedTemporaryFile(mode='wb', delete=False) as tmp_file: + try: + s3.download_fileobj(self.push_s3_bucket, key, tmp_file) + success = True + except botocore.exceptions.ClientError as error: + if boto_404(error): + logging.info("object not found: {}".format(key)) + logging.error(error) + except Exception as error: + raise error + + if not success: + sys.exit(1) + + logging.debug('Extracting %s to %s', identifier, destpath) + if self.restore_cache: + cached_path = self.get_cached_path(basename) + shutil.move(tmp_file.name, cached_path) + shutil.copyfile(cached_path, destpath) + else: + shutil.move(tmp_file.name, destpath) + return + else: + # Checking annex_push location, expecting annex_push path to be a filesystem location + logging.debug('Extracting %s to %s', identifier, destpath) + idpath = os.path.join(self.annex_push_path, identifier) + shutil.copyfile(idpath, destpath) + return def get_by_path(self, idpath, destpath): """Get a file identified by idpath content, and copy it at destpath.""" @@ -142,13 +338,21 @@ def get_by_path(self, idpath, destpath): def delete(self, identifier): """Remove a file from annex, whose ID is `identifier'""" - idpath = os.path.join(self.path, identifier) + + if self.annex_is_remote: + logging.info("delete functionality is not implemented for remote annex") + return False + + # local annex (file://) + idpath = os.path.join(self.annex_path, identifier) logging.debug('Deleting from annex: %s', idpath) infopath = get_info_from_digest(idpath) if os.path.exists(infopath): os.unlink(infopath) os.unlink(idpath) + return True + def import_dir(self, dirpath, force_temp=False): """ Look for identifier files in `dirpath' directory and setup a usable @@ -201,38 +405,74 @@ def _load_metadata(self, digest): Return metadata for specified digest if the annexed file exists. """ # Prepare metadata file - metapath = os.path.join(self.path, get_info_from_digest(digest)) + metapath = os.path.join(self.annex_path, get_info_from_digest(digest)) metadata = {} - # Read current metadata if present - if os.path.exists(metapath): - with open(metapath, encoding='utf-8') as fyaml: - metadata = yaml.load(fyaml, Loader=OrderedLoader) or {} - # Protect against empty file + + annex_url = urlparse(metapath) + if not self.annex_is_s3: + # Read current metadata if present + if os.path.exists(metapath): + with open(metapath) as fyaml: + metadata = yaml.load(fyaml, Loader=OrderedLoader) or {} + # Protect against empty file return metadata - def _save_metadata(self, digest, metadata): - """Write metadata file for specified digest and data.""" - metapath = os.path.join(self.path, get_info_from_digest(digest)) - with open(metapath, 'w', encoding='utf-8') as fyaml: - yaml.dump(metadata, fyaml, default_flow_style=False) - os.chmod(metapath, self.WMODE) - def list(self): """ Iterate over annex files, returning for them: filename, size and insertion time. """ - for filename in os.listdir(self.path): - if not filename.endswith('.info'): - info = self._load_metadata(filename) - names = info.get('filenames', []) - for annexed_file in names.values(): - insertion_time = annexed_file['date'] - insertion_time = datetime.datetime.strptime(insertion_time, "%c").timestamp() - - #The file size must come from the filesystem - meta = os.stat(os.path.join(self.path, filename)) - yield filename, meta.st_size, insertion_time, names + + if self.annex_is_remote: + if not self.annex_is_s3: + # non-S3, remote annex + print("list functionality is not implemented for public annex over non-S3, http") + return + else: + # s3 list + # if http(s) uri is s3-compliant, then listing is easy + s3 = self.get_read_s3_client() + + # disable signing if accessing anonymously + s3.meta.events.register('choose-signer.s3.*', botocore.handlers.disable_signing) + + response = s3.list_objects_v2(Bucket=self.read_s3_bucket, Prefix=self.read_s3_prefix) + if 'Contents' not in response: + logging.info(f"No files found in '{self.read_s3_prefix}'") + return + + for obj in response['Contents']: + key = obj['Key'] + filename = os.path.basename(key) + + if filename.endswith(_INFOSUFFIX): + continue + + meta_obj_name = get_info_from_digest(key) + meta_obj = s3.get_object(Bucket=self.read_s3_bucket, Key=meta_obj_name) + info = yaml.safe_load(meta_obj['Body']) or {} + names = info.get('filenames', []) + for annexed_file in names.values(): + insertion_time = annexed_file['date'] + insertion_time = datetime.datetime.strptime(insertion_time, "%c").timestamp() + + size = obj['Size'] + + yield filename, size, insertion_time, names + + # local annex (i.e. file://) + else: + for filename in os.listdir(self.annex_path): + if not filename.endswith('.info'): + info = self._load_metadata(filename) + names = info.get('filenames', []) + for annexed_file in names.values(): + insertion_time = annexed_file['date'] + insertion_time = datetime.datetime.strptime(insertion_time, "%c").timestamp() + + #The file size must come from the filesystem + meta = os.stat(os.path.join(self.annex_path, filename)) + yield filename, meta.st_size, insertion_time, names def push(self, filepath): """ @@ -243,41 +483,86 @@ def push(self, filepath): """ # Compute hash digest = hashfile(filepath) - destpath = os.path.join(self.path, digest) - filename = os.path.basename(filepath) - # Prepare metadata file - metadata = self._load_metadata(digest) + if self.push_over_s3: + s3 = self.get_push_s3_client() + if s3 is None: + logging.error("could not get s3 client: get_push_s3_client failed") + sys.exit(1) + + destpath = os.path.join(self.push_s3_prefix, digest) + filename = os.path.basename(filepath) + key = destpath + + # Prepare metadata file + meta_obj_name = get_info_from_digest(key) + metadata = {} + try: + meta_obj = s3.get_object(Bucket=self.push_s3_bucket, Key=meta_obj_name) + metadata = yaml.safe_load(meta_obj['Body']) or {} + except Exception as e: + pass + + originfo = os.stat(filepath) + destinfo = None + try: + destinfo = s3.get_object(Bucket=self.push_s3_bucket, Key=key) + except Exception as e: + pass + if destinfo and destinfo["ContentLength"] == originfo.st_size and \ + filename in metadata.get('filenames', {}): + logging.debug("%s is already into annex, skipping it", filename) + else: + # Update them and write them back + fileset = metadata.setdefault('filenames', {}) + fileset.setdefault(filename, {}) + fileset[filename]['date'] = time.strftime("%c") - # Is file already present? - originfo = os.stat(filepath) - destinfo = None - if os.path.exists(destpath): - destinfo = os.stat(destpath) - if destinfo and destinfo.st_size == originfo.st_size and \ - filename in metadata.get('filenames', {}): - logging.debug('%s is already into annex, skipping it', filename) + with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.info') as tmp_file: + yaml.dump(metadata, tmp_file, default_flow_style=False) + s3.upload_file(tmp_file.name, self.push_s3_bucket, meta_obj_name) + logging.debug("Importing %s into annex (%s)", filepath, digest) + s3.upload_file(filepath, self.push_s3_bucket, key) else: - # Update them and write them back - fileset = metadata.setdefault('filenames', {}) - fileset.setdefault(filename, {}) - fileset[filename]['date'] = time.strftime("%c") - self._save_metadata(digest, metadata) + destpath = os.path.join(self.annex_push_path, digest) + filename = os.path.basename(filepath) - # Move binary file to annex - logging.debug('Importing %s into annex (%s)', filepath, digest) - shutil.copyfile(filepath, destpath) - os.chmod(destpath, self.WMODE) + # Prepare metadata file + metadata = self._load_metadata(digest) - # Verify permission are correct before copying - os.chmod(filepath, self.RMODE) + # Is file already present? + originfo = os.stat(filepath) + destinfo = None + if os.path.exists(destpath): + destinfo = os.stat(destpath) + if destinfo and destinfo.st_size == originfo.st_size and \ + filename in metadata.get('filenames', {}): + logging.debug('%s is already into annex, skipping it', filename) + + else: + # Update them and write them back + fileset = metadata.setdefault('filenames', {}) + fileset.setdefault(filename, {}) + fileset[filename]['date'] = time.strftime("%c") + + metapath = os.path.join(self.annex_push_path, get_info_from_digest(digest)) + with open(metapath, 'w') as fyaml: + yaml.dump(metadata, fyaml, default_flow_style=False) + os.chmod(metapath, self.WMODE) + + # Move binary file to annex + logging.debug('Importing %s into annex (%s)', filepath, digest) + shutil.copyfile(filepath, destpath) + os.chmod(destpath, self.WMODE) + + # Verify permission are correct before copying + os.chmod(filepath, self.RMODE) # Create fake pointer file with open(filepath, 'w', encoding='utf-8') as fakefile: fakefile.write(digest) - def backup(self, packages, output_file=None): """ Create a full backup of package list @@ -301,17 +586,36 @@ def backup(self, packages, output_file=None): prefix='rift-annex-backup', suffix='.tar.gz').name + tmp_dir = None + if self.annex_is_remote: + tmp_dir = tempfile.TemporaryDirectory() + with tarfile.open(output_file, "w:gz") as tar: - # translate sourcefiles path to annex path and add it to the output_file for _file in filelist: digest = get_digest_from_path(_file) - annex_file = os.path.join(self.path, digest) - annex_file_info = os.path.join(self.path, get_info_from_digest(digest)) - - tar.add(annex_file, arcname=os.path.basename(annex_file)) - tar.add(annex_file_info, arcname=os.path.basename(annex_file_info)) + annex_file = os.path.join(self.annex_path, digest) + annex_file_info = os.path.join(self.annex_path, get_info_from_digest(digest)) + + if self.annex_is_remote: + for f in (annex_file, annex_file_info): + basename = os.path.basename(f) + tmp = os.path.join(tmp_dir.name, basename) + cmd = ["curl", "-sS", "-w", '"%{http_code}"', "-o", tmp, f] + try: + proc = subprocess.run(cmd, check=True, capture_output=True, text=True) + if "404" not in proc.stdout.strip(): + tar.add(tmp, arcname=basename) + except Exception as e: + logging.error("failed to fetch file from annex: {}".format(e)) + else: + tar.add(annex_file, arcname=os.path.basename(annex_file)) + tar.add(annex_file_info, arcname=os.path.basename(annex_file_info)) print(f"> {pkg_nb}/{total_packages} ({round((pkg_nb*100)/total_packages,2)})%\r" , end="") pkg_nb += 1 + + if tmp_dir: + tmp_dir.cleanup() + return output_file diff --git a/lib/rift/Config.py b/lib/rift/Config.py index 523a2822..19251ffe 100644 --- a/lib/rift/Config.py +++ b/lib/rift/Config.py @@ -86,6 +86,8 @@ class RiftDeprecatedConfWarning(FutureWarning): _DEFAULT_SYNC_METHOD = 'dnf' _DEFAULT_SYNC_INCLUDE = [] _DEFAULT_SYNC_EXCLUDE = [] +_DEFAULT_S3_CREDENTIAL_FILE = '~/.rift/auth.json' + class Config(): """ @@ -111,9 +113,33 @@ class Config(): 'packages_dir': { 'default': _DEFAULT_PKG_DIR, }, + 's3_credential_file': { + 'required': False, + 'default': _DEFAULT_S3_CREDENTIAL_FILE, + }, + 's3_auth_endpoint': { + 'required': False, + }, + 'idp_auth_endpoint': { + 'required': False + }, + 'idp_app_token': { + 'required': True + }, + 'annex_restore_cache': { + 'required': False, + }, 'annex': { 'required': True, }, + 'annex_is_s3': { + 'required': False, + 'default': False, + 'check': 'bool', + }, + 'annex_push': { + 'required': False, + }, 'working_repo': { }, 'repos': { @@ -547,6 +573,12 @@ def _key_value(self, syntax, key, value, check): "bool": bool, } + if check == 'bool': + if not isinstance(value, bool): + raise DeclError( + f"Bad data type {value.__class__.__name__} for '{key}'" + ) + return value if check == 'dict': if not isinstance(value, dict): raise DeclError( diff --git a/lib/rift/Controller.py b/lib/rift/Controller.py index 02684272..0cecd6e6 100644 --- a/lib/rift/Controller.py +++ b/lib/rift/Controller.py @@ -40,6 +40,7 @@ from operator import attrgetter import random import time +import datetime import textwrap # Since pylint can not found rpm.error, disable this check from rpm import error as RpmError # pylint: disable=no-name-in-module @@ -48,7 +49,7 @@ from rift import RiftError, __version__ from rift.Annex import Annex, is_binary from rift.Config import Config, Staff, Modules -from rift.Gerrit import Review +from rift.auth import auth from rift.Mock import Mock from rift.Package import Package, Test from rift.Repository import LocalRepository, ProjectArchRepositories @@ -216,6 +217,9 @@ def make_parser(): subsubprs.add_argument('--dest', metavar='PATH', required=True, help='destination path') + # Auth options + subprs = subparsers.add_parser('auth', help='Authenticate for access to Annex write access') + # VM options subprs = subparsers.add_parser('vm', help='Manipulate VM process') subprs.add_argument('-a', '--arch', help='CPU architecture of the VM') @@ -265,11 +269,9 @@ def make_parser(): subprs.add_argument('--bump', dest='bump', action='store_true', help='also bump the release number') - # Gerrit review - subprs = subparsers.add_parser('gerrit', add_help=False, - help='Make Gerrit automatic review') - subprs.add_argument('--change', help="Gerrit Change-Id", required=True) - subprs.add_argument('--patchset', help="Gerrit patchset ID", required=True) + # GitLab review + subprs = subparsers.add_parser('gitlab', add_help=False, + help='Check specfiles for GitLab') subprs.add_argument('patch', metavar='PATCH', type=argparse.FileType('r')) # sync @@ -358,8 +360,8 @@ def action_annex(args, config, staff, modules): message(f"{srcfile}: not an annex pointer, ignoring") elif args.annex_cmd == 'delete': - annex.delete(args.id) - message(f"{args.id} has been deleted") + if annex.delete(args.id): + message('%s has been deleted' % args.id) elif args.annex_cmd == 'get': annex.get(args.id, args.dest) @@ -370,8 +372,26 @@ def action_annex(args, config, staff, modules): output_file = annex.backup( Package.list(config, staff, modules), args.output_file ) + message(f"Annex backup is available here: {output_file}") +def action_auth(args, config): + """Action for 'auth' sub-commands.""" + auth_obj = auth(config) + + if auth_obj.authenticate(): + msg = "succesfully authenticated" + + t = auth_obj.get_expiration_timestr() + if t != "": + msg += "; token expires in {}".format(t) + else: + msg += "; token expiration time is unknown" + + message(msg) + else: + message("error: authentication failed") + def _vm_start(vm): if vm.running(): message('VM is already running') @@ -921,23 +941,18 @@ def action_validdiff(args, config): return rc -def action_gerrit(args, config, staff, modules): - """Review a patchset for Gerrit (specfiles)""" - - review = Review() +def action_gitlab(args, config, staff, modules): + """Review a patchset for GitLab (specfiles)""" # Parse matching diff and specfiles in it - for patchedfile in parse_unidiff(args.patch): - filepath = patchedfile.path - names = filepath.split(os.path.sep) + for f in parse_unidiff(args.patch): + path = f.path + names = path.split(os.path.sep) if names[0] == config.get('packages_dir'): pkg = Package(names[1], config, staff, modules) - if filepath == pkg.specfile and not patchedfile.is_deleted_file: - Spec(pkg.specfile, config=config).analyze(review, pkg.dir) - - # Push review - review.msg_header = 'rpmlint analysis' - review.push(config, args.change, args.patchset) + if os.path.abspath(path) == pkg.specfile and not f.is_deleted_file: + spec = Spec(pkg.specfile, config=config) + spec.check() def action_sync(args, config): """Action for 'sync' command.""" @@ -1043,6 +1058,11 @@ def action(config, args): action_annex(args, config, *staff_modules(config)) return + # AUTH + if args.command == 'auth': + action_auth(args, config) + return + # VM if args.command == 'vm': return action_vm(args, config) @@ -1185,9 +1205,9 @@ def action(config, args): config=config).add_changelog_entry(author, args.comment, bump=getattr(args, 'bump', False)) - # GERRIT - elif args.command == 'gerrit': - return action_gerrit(args, config, *staff_modules(config)) + # GITLAB + elif args.command == 'gitlab': + return action_gitlab(args, config, *staff_modules(config)) # SYNC elif args.command == 'sync': diff --git a/lib/rift/RPM.py b/lib/rift/RPM.py index 7cfa3eae..246f6a2c 100644 --- a/lib/rift/RPM.py +++ b/lib/rift/RPM.py @@ -475,31 +475,6 @@ def check(self, pkg=None): if popen.returncode != 0: raise RiftError(stderr or 'rpmlint reported errors') - def analyze(self, review, configdir=None): - """Run `rpmlint' for this specfile and fill provided `review'.""" - cmd, env = self._check(configdir) - with Popen(cmd, stdout=PIPE, stderr=PIPE, env=env, universal_newlines=True) as popen: - stdout, stderr = popen.communicate() - if popen.returncode not in (0, 64, 66): - raise RiftError(stderr or f"rpmlint returned {popen.returncode}") - - for line in stdout.splitlines(): - if line.startswith(self.filepath + ':'): - line = line[len(self.filepath + ':'):] - try: - linenbr = None - code, txt = line.split(':', 1) - if code.isdigit(): - linenbr = int(code) - code, txt = txt.split(':', 1) - review.add_comment(self.filepath, linenbr, - code.strip(), txt.strip()) - except (ValueError, KeyError): - pass - - if popen.returncode != 0: - review.invalidate() - def supports_arch(self, arch): """ Returns True is package spec file does not restrict ExclusiveArch or if @@ -507,7 +482,6 @@ def supports_arch(self, arch): """ return not self.exclusive_archs or arch in self.exclusive_archs - class Variable(): """ diff --git a/lib/rift/auth.py b/lib/rift/auth.py new file mode 100644 index 00000000..77b9225e --- /dev/null +++ b/lib/rift/auth.py @@ -0,0 +1,327 @@ +# +# Copyright (C) 2014-2025 CEA +# +# This file is part of Rift project. +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +""" +Auth: + This package manage rift s3 authentication +""" +import logging +import datetime +import errno +import getpass +import json +import os +import requests +import sys +import urllib3 +import xmltodict +import yaml + +urllib3.disable_warnings() + +class auth(): + """ + Config: Manage rift authentication + This class manages rift authentication + """ + def __init__(self, config, write_path=None): + self.idp_app_token = config.get('idp_app_token') + self.idp_auth_endpoint = config.get('idp_auth_endpoint') + self.s3_auth_endpoint = config.get('s3_auth_endpoint') + self.credentials_file = os.path.expanduser(config.get('s3_credential_file')) + + self.config = {} + self.expiration_dt = "" + + def get_expiration_timestr(self): + """ + Returns a human readable time string of auth token, if possible. + If token expiration date is not set, returns an emptry string + """ + if not self.expiration_dt: + return "" + return self.expiration_dt.strftime("%a %b %d %H:%M:%S %Y") + + def restore_state(self): + """ + Loads data from existing credentials file, if one exists. + If credentials file contains expired data, remove expired items from file. + """ + + with open(self.credentials_file, 'r') as fs: + data = fs.read() + + config = {} + try: + config = json.loads(data) + except Exception as e: + logging.info("failed to load json from existing credentials file") + + update_authfile = False + + expiry = config.get("expiration") + if expiry: + expiration = datetime.datetime.strptime(expiry, "%Y-%m-%dT%H:%M:%SZ") + if expiration > datetime.datetime.now(): + # S3 credentials are still valid + logging.info("found existing, valid S3 credentials") + self.expiration_dt = expiration + else: + # S3 credentials expired + logging.info("info: found existing, expired S3 credentials") + config.pop("expiration", None) + config.pop("access_key_id", None) + config.pop("secret_access_key", None) + config.pop("session_token", None) + update_authfile = True + + idp_expiry = config.get("idp_token_expiration") + if idp_expiry: + expiration = datetime.datetime.strptime(idp_expiry, "%Y-%m-%dT%H:%M:%SZ") + if expiration > datetime.datetime.now(): + # IDP access token is still valid + logging.info("found existing, valid idp access token") + else: + # IDP access token has expired + logging.info("found existing, expired idp access token") + config.pop("idp_token") + config.pop("idp_token_expiration") + update_authfile = True + + self.config = config + + if update_authfile: + self.save_state() + + def save_state(self): + """ + Saves auth object config information to credentials file. + """ + umask = os.umask(0) + fd = os.open( + path = self.credentials_file, + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC, + mode=0o600 + ) + with open(fd, "w") as fs: + json.dump(self.config, fs, indent=2, sort_keys=True) + + # Step 1: Get OpenID token + def get_idp_token(self): + """ + Get OpenID Token + """ + # curl \ + # -d "client_id=minio" \ + # -d "grant_type=password" \ + # -d "username=$INTI_LOGIN" \ + # -d "password=$INTI_PASSWORD" \ + # -H "Content-Type: application/x-www-form-urlencoded" \ + # "https://idp.ccc.ocre.cea.fr/auth/realms/minio/protocol/openid-connect/token" + + token = self.config.get("idp_token") + if token: + logging.info("retrieved existing idp_token from auth file") + return True + + if not self.idp_auth_endpoint: + logging.error("missing required config parameter: idp_auth_endpoint") + return False + + client_secret = self.idp_app_token + + user = os.environ.get("RIFT_AUTH_USER") + if not user: + default_user = getpass.getuser() + user = input("Username [{}]: ".format(default_user)) or default_user + + password = os.environ.get("RIFT_AUTH_PASSWORD") + if not password: + password = getpass.getpass('Password: ') + + data = { + 'client_id': 'minio', + 'grant_type': 'password', + 'username': user, + 'password': password, + 'client_secret': client_secret, + } + + res = requests.post( + self.idp_auth_endpoint, + data = data, + headers = {"Content-Type": "application/x-www-form-urlencoded"} + ) + + js = res.json() + + token = js.get("access_token") + if not token: + logging.error("received unexpected response while fetching idp access token: missing field 'access_token'") + return False + + expires_in_sec = js.get("expires_in") + if not expires_in_sec: + logging.info("received unexpected response while fetching idp access token: missing field 'expires_in'") + expires_in = 0 + + expire_dt = datetime.datetime.now() + datetime.timedelta(seconds=expires_in_sec) + + self.config["idp_token_expiration"] = expire_dt.strftime("%Y-%m-%dT%H:%M:%SZ") + self.config["idp_token"] = token + self.save_state() + + return True + + # Step 2: Get S3 credentials using token from (1) + def get_s3_credentials(self): + """ + Obtains an S3 credential using an already-obtained OpenID credential, unless + an S3 credential is already available in auth object's config, in which case the credential + is considered to have already been obtained. + + Returns True on success, False on failure. + """ + # curl \ + # -s \ + # -X POST \ + # https://annexe-forge.ccc.ocre.cea.fr \ + # -H "Content-Type: application/x-www-form-urlencoded" \ + # -d "Version=2011-06-15" \ + # -d "Action=AssumeRoleWithWebIdentity" \ + # -d "DurationSeconds=86000" \ + # -d "WebIdentityToken=$ACCESS_TOKEN" + access_key_id = self.config.get("access_key_id", "") + secret_access_key = self.config.get("secret_access_key", "") + session_token = self.config.get("session_token", "") + + if "" not in (access_key_id, secret_access_key, session_token): + return True + + if not self.s3_auth_endpoint: + logging.error("missing required config parameter: s3_auth_endpoint") + return False + + if not self.get_idp_token(): + logging.error("failed to get idp access token") + return False + + data = { + 'Version': '2011-06-15', + 'Action': 'AssumeRoleWithWebIdentity', + 'DurationSeconds': '86000', + 'WebIdentityToken': self.config["idp_token"], + } + + res = requests.post( + self.s3_auth_endpoint, + data = data, + headers = {"Content-Type": "application/x-www-form-urlencoded"}, + verify = False + ) + + res_xml = xmltodict.parse(res.text) + + creds = res_xml.get("AssumeRoleWithWebIdentityResponse") + if not creds: + logging.error("S3 credential response missing expected key: AssumeRoleWithWebIdentityResponse") + return False + + creds = creds.get("AssumeRoleWithWebIdentityResult") + if not creds: + logging.error("S3 credential response missing expected key: AssumeRoleWithWebIdentityResult") + return False + + creds = creds.get("Credentials") + if not creds: + logging.error("S3 credential response missing expected key: Credentials") + return False + + access_key_id = creds.get("AccessKeyId", "") + secret_access_key = creds.get("SecretAccessKey", "") + session_token = creds.get("SessionToken", "") + expiration = creds.get("Expiration", "") + + if "" in (access_key_id, secret_access_key, session_token, expiration): + logging.error("one or more expected credential values is missing: AccessKeyId, SecretAccessKey, SessionToken, Expiration") + return False + + self.config["access_key_id"] = access_key_id + self.config["secret_access_key"] = secret_access_key + self.config["session_token"] = session_token + self.config["expiration"] = expiration + + self.expiration_dt = datetime.datetime.strptime(expiration, "%Y-%m-%dT%H:%M:%SZ") + + self.save_state() + + return True + + def authenticate(self): + """ + Ensures S3 credentials are available. + Returns True if S3 credentials are found, or False if not. + + This is the method auth object consumers should invoke to + ensure authentication credentials are available. + """ + + aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID") + aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY") + aws_session_token = os.environ.get("AWS_SESSION_TOKEN") + + if None not in (aws_access_key_id, aws_secret_access_key): + logging.info("found AWS S3 variables in environment; will bypass credentials file") + logging.info("to allow use of credential file, please clear these environment variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN") + self.config["access_key_id"] = aws_access_key_id + self.config["secret_access_key"] = aws_secret_access_key + self.config["session_token"] = aws_session_token + return True + + if os.path.isfile(self.credentials_file): + logging.info("found credentials file: {}".format(self.credentials_file)) + self.restore_state() + else: + base = os.path.dirname(self.credentials_file) + if os.path.exists(base): + if not os.path.isdir(base): + logging.error("{} should be a directory".format(base)) + sys.exit(1) + else: + os.makedirs(base) + + if not self.get_s3_credentials(): + logging.error("failed to obtain S3 credentials") + sys.exit(1) + + return True + + diff --git a/lib/rift/patches.py b/lib/rift/patches.py index 3690af7a..490766dd 100644 --- a/lib/rift/patches.py +++ b/lib/rift/patches.py @@ -119,6 +119,18 @@ def _validate_patched_file(patched_file, config, modules, staff): logging.debug('Ignoring project config file: %s', filepath) return False + if filepath == '.gitlab-ci.yml': + logging.debug('Ignoring gitlab ci file: %s', filepath) + return False + + if filepath == 'CODEOWNERS': + logging.debug('Ignoring gitlab ci file: %s', filepath) + return False + + if names[0] == "gitlab-ci": + logging.debug("Ignoring gitlab ci file: %s" % filepath) + return False + if patched_file.binary: raise RiftError(f"Binary file detected: {filepath}") diff --git a/setup.py b/setup.py index 578757bd..13ec549c 100755 --- a/setup.py +++ b/setup.py @@ -42,6 +42,7 @@ author_email='aurelien.cedeyn@cea.fr', package_dir={'': 'lib'}, packages=['rift'], + install_requires=['boto3>=1.18.65', 'xmltodict'], py_modules = ['unidiff'], data_files = [ ('/usr/share/rift/template', ['template/project.conf', 'template/local.conf', 'template/mock.tpl']),