archlinux
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎devel/management/commands/archweb_inotify.py‎
Lines changed: 139 additions & 0 deletions b/‎devel/management/commands/archweb_inotify.py‎
Lines changed: 139 additions & 0 deletions
diff --git a/‎devel/management/commands/read_links_inotify.py‎
Lines changed: 101 additions & 0 deletions b/‎devel/management/commands/read_links_inotify.py‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎devel/management/commands/readlinks.py‎
Lines changed: 98 additions & 0 deletions b/‎devel/management/commands/readlinks.py‎
Lines changed: 98 additions & 0 deletions
@@ -125,6 +125,8 @@ Archweb provides multiple management commands for importing various sorts of dat
 * mirrorresolv - Poll every active mirror URLs and determine wheteher they have IP4 and/or IPv6 addresses.
 * populate_signoffs - retrieves the latest commit message of a signoff-eligible package.
 * update_planet - Import all feeds for users who have a valid website and website_rss in their user profile.
+* read_links - Reads a repo.links.db.tar.gz file and updates the Soname model.
+* read_links_inotify - Watches a templated patch for updates of *.links.tar.gz to update Arch databases with.
 
 # Updating iPXE image
 
 
@@ -0,0 +1,139 @@
+import logging
+import multiprocessing
+import os
+import pyinotify
+import sys
+import threading
+import time
+
+from django.db.utils import OperationalError
+
+
+logging.basicConfig(
+    level=logging.WARNING,
+    format='%(asctime)s -> %(levelname)s: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S',
+    stream=sys.stderr)
+logger = logging.getLogger()
+
+
+class Database(object):
+    '''A object representing a pacman database on the filesystem. It stores
+    various bits of metadata and state representing the file path, when we last
+    updated, how long our delay is before performing the update, whether we are
+    updating now, etc.'''
+    def __init__(self, arch, path, callback_func, delay=60.0, nice=3, retry_limit=5):
+        self.arch = arch
+        self.path = path
+        self.delay = delay
+        self.nice = nice
+        self.retry_limit = retry_limit
+        self.mtime = None
+        self.last_import = None
+        self.update_thread = None
+        self.updating = False
+        self.run_again = False
+        self.lock = threading.Lock()
+        self.callback_func = callback_func
+
+    def _start_update_countdown(self):
+        self.update_thread = threading.Timer(self.delay, self.update)
+        logger.info('Starting %.1f second countdown to update %s',
+                    self.delay, self.path)
+        self.update_thread.start()
+
+    def queue_for_update(self, mtime):
+        logger.debug('Queueing database %s...', self.path)
+        with self.lock:
+            self.mtime = mtime
+            if self.updating:
+                # store the fact that we will need to run it again
+                self.run_again = True
+                return
+            if self.update_thread:
+                self.update_thread.cancel()
+                self.update_thread = None
+            self._start_update_countdown()
+
+    def update(self):
+        logger.debug('Updating database %s...', self.path)
+        with self.lock:
+            self.last_import = time.time()
+            self.updating = True
+
+        try:
+            # invoke reporead's primary method. we do this in a separate
+            # process for memory conservation purposes; these processes grow
+            # rather large so it is best to free up the memory ASAP.
+            # A retry mechanism exists for when reporead_inotify runs on a different machine.
+            def run():
+                retry = True
+                retry_count = 0
+                if self.nice != 0:
+                    os.nice(self.nice)
+                while retry and retry_count < self.retry_limit:
+                    try:
+                        self.callback_func(self.arch, self.path, {})
+                        retry = False
+                    except OperationalError as exc:
+                        retry_count += 1
+                        logger.error('Unable to update database \'%s\', retrying=%d', self.path, retry_count, exc_info=exc)
+                        time.sleep(5)
+
+                if retry_count == self.retry_limit:
+                    logger.error('Unable to update database, exceeded maximum retries')
+
+            process = multiprocessing.Process(target=run)
+            process.start()
+            process.join()
+        finally:
+            logger.debug('Done updating database %s.', self.path)
+            with self.lock:
+                self.update_thread = None
+                self.updating = False
+                if self.run_again:
+                    self.run_again = False
+                    self._start_update_countdown()
+
+
+class EventHandler(pyinotify.ProcessEvent):
+    '''Our main event handler which listens for database change events. Because
+    we are watching the whole directory, we filter down and only look at those
+    events dealing with files databases.'''
+
+    def my_init(self, filename_suffix, callback_func, **kwargs):
+        self.databases = {}
+        self.arch_lookup = {}
+
+        self.filename_suffix = filename_suffix
+        self.callback_func = callback_func
+
+        # we really want a single path to arch mapping, so massage the data
+        arch_paths = kwargs['arch_paths']
+        for arch, paths in arch_paths.items():
+            self.arch_lookup.update((path.rstrip('/'), arch) for path in paths)
+
+    def process_default(self, event):
+        '''Primary event processing function which kicks off reporead timer
+        threads if a files database was updated.'''
+        name = event.name
+        if not name:
+            return
+        # screen to only the files we care about, skipping temp files
+        if name.endswith(self.filename_suffix) and not name.startswith('.'):
+            path = event.pathname
+            stat = os.stat(path)
+            database = self.databases.get(path, None)
+            if database is None:
+                arch = self.arch_lookup.get(event.path, None)
+                if arch is None:
+                    logger.warning(
+                        'Could not determine arch for %s, skipping update',
+                        path)
+                    return
+                database = Database(arch, path, self.callback_func)
+                self.databases[path] = database
+            database.queue_for_update(stat.st_mtime)
+
+
+# vim: set ts=4 sw=4 et:
@@ -0,0 +1,101 @@
+import logging
+import pyinotify
+import sys
+import threading
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db import connection, transaction
+
+from main.models import Arch, Repo
+from .readlinks import read_links
+from .archweb_inotify import EventHandler
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s -> %(levelname)s: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S',
+    stream=sys.stderr)
+logger = logging.getLogger()
+
+
+def wrapper_read_links(arch, filepath, obj):
+    read_links(filepath)
+
+
+class Command(BaseCommand):
+    help = "Watch links files and run an update when necessary."
+    args = "[path_template]"
+
+    def handle(self, path_template=None, **options):
+        v = int(options.get('verbosity', 0))
+        if v == 0:
+            logger.level = logging.ERROR
+        elif v == 1:
+            logger.level = logging.INFO
+        elif v >= 2:
+            logger.level = logging.DEBUG
+
+        if not path_template:
+            path_template = '/srv/ftp/%(repo)s/os/%(arch)s/'
+        self.path_template = path_template
+
+        notifier = self.setup_notifier()
+        # this thread is done using the database; all future access is done in
+        # the spawned read_repo() processes, so close the otherwise completely
+        # idle connection.
+        connection.close()
+
+        logger.info('Entering notifier loop')
+        notifier.loop()
+
+        logger.info('Cancelling remaining threads...')
+        for thread in threading.enumerate():
+            if hasattr(thread, 'cancel'):
+                thread.cancel()
+
+    @transaction.atomic
+    def setup_notifier(self):
+        '''Set up and configure the inotify machinery and logic.
+        This takes the provided or default path_template and builds a list of
+        directories we need to watch for database updates. It then validates
+        and passes these on to the various pyinotify pieces as necessary and
+        finally builds and returns a notifier object.'''
+        with transaction.atomic():
+            arches = Arch.objects.filter(agnostic=False)
+            repos = Repo.objects.all()
+
+        arch_path_map = {arch: None for arch in arches}
+        all_paths = set()
+        total_paths = 0
+        for arch in arches:
+            combos = ({'repo': repo.name.lower(), 'arch': arch.name}
+                      for repo in repos)
+            # take a python format string and generate all unique combinations
+            # of directories from it; using set() ensures we filter it down
+            paths = {self.path_template % values for values in combos}
+            total_paths += len(paths)
+            all_paths |= paths
+            arch_path_map[arch] = paths
+
+        logger.info('Watching %d total paths', total_paths)
+        logger.debug(all_paths)
+
+        # sanity check- basically ensure every path we created from the
+        # template mapped to only one architecture
+        if total_paths != len(all_paths):
+            raise CommandError('path template did not uniquely '
+                               'determine architecture for each file')
+
+        # A proper atomic replacement of the database as done by rsync is type
+        # IN_MOVED_TO. repo-add/remove will finish with a IN_CLOSE_WRITE.
+        mask = pyinotify.IN_CLOSE_WRITE | pyinotify.IN_MOVED_TO
+
+        manager = pyinotify.WatchManager()
+        for name in all_paths:
+            manager.add_watch(name, mask)
+
+        handler = EventHandler(arch_paths=arch_path_map, filename_suffix='.links.tar.gz', callback_func=wrapper_read_links)
+        return pyinotify.Notifier(manager, handler)
+
+
+# vim: set ts=4 sw=4 et:
@@ -0,0 +1,98 @@
+import logging
+import os
+import re
+import sys
+import tarfile
+
+from django.core.management.base import BaseCommand, CommandError
+
+from main.models import Repo, Package, Soname
+
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s -> %(levelname)s: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S',
+    stream=sys.stderr)
+logger = logging.getLogger()
+
+
+class Command(BaseCommand):
+    help = "Import links db (soname mapping)."
+    missing_args_message = 'missing links db'
+
+    def add_arguments(self, parser):
+        parser.add_argument('args', nargs='*', help='<arch> <filename>')
+
+    def handle(self, filename=None, **options):
+        if not filename:
+            raise CommandError('Links database file is required.')
+
+        filename = os.path.normpath(filename)
+        if not os.path.exists(filename) or not os.path.isfile(filename):
+            raise CommandError('Specified links database file does not exist.')
+
+        v = int(options.get('verbosity', None))
+        if v == 0:
+            logger.level = logging.ERROR
+        elif v == 1:
+            logger.level = logging.INFO
+        elif v >= 2:
+            logger.level = logging.DEBUG
+
+        return read_linksdb(filename)
+
+
+def get_pkginfo(pkgnamever):
+    pkgname, pkgver, pkgrel = pkgnamever.rsplit('-', 2)
+    epoch = '0'
+    if ':' in pkgver:
+        epoch, pkgver = pkgver.split(':')
+
+    return pkgname, epoch, pkgver, pkgrel
+
+
+def read_linksdb(repopath):
+    logger.info("Starting linksdb parsing")
+    if not os.path.exists(repopath):
+        logger.error("Could not read file %s", repopath)
+
+    logger.info("Reading repo tarfile %s", repopath)
+    filename = os.path.split(repopath)[1]
+    m = re.match(r"^(.*)\.links\.tar(\..*)?$", filename)
+    if m:
+        reponame = m.group(1)
+    else:
+        logger.error("File does not have the proper extension")
+        raise Exception("File does not have the proper extension")
+
+    repository = Repo.objects.get(name__iexact=reponame)
+    sonames = []
+
+    with tarfile.open(repopath, 'r') as repodb:
+        logger.debug("Starting soname parsing")
+
+        for tarinfo in repodb.getmembers():
+            if tarinfo.isreg():
+                pkgnamever = os.path.dirname(tarinfo.name)
+                pkgnamever = pkgnamever.replace('./', '')
+                pkgname, epoch, pkgver, pkgrel = get_pkginfo(pkgnamever)
+
+                dbpkg = Package.objects.filter(pkgname=pkgname, pkgver=pkgver,
+                                               pkgrel=pkgrel, epoch=epoch,
+                                               repo=repository).first()
+
+                if not dbpkg:
+                    logging.info("Package name '%s' not found in repo database", pkgname)
+                    continue
+
+                files_data = repodb.extractfile(tarinfo)
+                old_sonames = Soname.objects.filter(pkg=dbpkg)
+                for soname in files_data:
+                    soname = soname.strip().decode()
+                    # New soname which we do not track yet for this package
+                    if not old_sonames.filter(name=soname):
+                        sonames.append(Soname(pkg=dbpkg, name=soname))
+
+    if sonames:
+        Soname.objects.bulk_create(sonames)