|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +''' |
| 4 | +classify.py: part of singularity package |
| 5 | +functions to tag and classify images |
| 6 | +
|
| 7 | +''' |
| 8 | + |
| 9 | +from glob import glob |
| 10 | +import json |
| 11 | +import os |
| 12 | +import re |
| 13 | +import requests |
| 14 | +from singularity.logman import bot |
| 15 | +from singularity.analysis.compare import ( |
| 16 | + compare_packages, |
| 17 | + compare_containers |
| 18 | +) |
| 19 | +from singularity.analysis.utils import get_package_base |
| 20 | +from singularity.package import package as make_package |
| 21 | +from singularity.utils import get_installdir |
| 22 | +from singularity.views.utils import get_container_contents |
| 23 | + |
| 24 | +from singularity.package import ( |
| 25 | + load_package, |
| 26 | + package |
| 27 | +) |
| 28 | + |
| 29 | +import numpy |
| 30 | +import pandas |
| 31 | +import shutil |
| 32 | +import sys |
| 33 | +import tempfile |
| 34 | +import zipfile |
| 35 | + |
| 36 | + |
| 37 | + |
| 38 | +def get_diff(container=None,package=None,sudopw=None): |
| 39 | + '''get diff will return a dictionary of folder paths and files that |
| 40 | + are in an image or package vs. all standard operating systems. The |
| 41 | + algorithm is explained below. |
| 42 | + :param container: if provided, will use container as image. Can also provide |
| 43 | + :param package: if provided, can be used instead of container |
| 44 | + :param sudopw: needed if a package isn't provided (will prompt user) |
| 45 | +
|
| 46 | + ::notes |
| 47 | + |
| 48 | + The algorithm works as follows: |
| 49 | + 1) first compare package to set of base OS (provided with shub) |
| 50 | + 2) subtract the most similar os from image, leaving "custom" files |
| 51 | + 3) organize custom files into dict based on folder name |
| 52 | +
|
| 53 | + ''' |
| 54 | + if package == None: |
| 55 | + package = make_package(container,remove_image=True,sudopw=sudopw) |
| 56 | + |
| 57 | + # Find the most similar os |
| 58 | + most_similar = estimate_os(package=package,sudopw=sudopw) |
| 59 | + similar_package = "%s/docker-os/%s.img.zip" %(get_package_base(),most_similar) |
| 60 | + |
| 61 | + comparison = compare_containers(image_package1=package, |
| 62 | + image_package2=similar_package, |
| 63 | + by='files.txt')['files.txt'] |
| 64 | + |
| 65 | + container_unique = comparison['unique1'] |
| 66 | + |
| 67 | + # Try to organize files based on common folders: |
| 68 | + folders = dict() |
| 69 | + for file_path in container_unique: |
| 70 | + fileparts = file_path.split('/') |
| 71 | + if len(fileparts) >= 2: |
| 72 | + folder = fileparts[-2] |
| 73 | + else: |
| 74 | + folder = '/' |
| 75 | + filey = fileparts[-1] |
| 76 | + if folder in folders: |
| 77 | + folders[folder].append(filey) |
| 78 | + else: |
| 79 | + folders[folder] = [filey] |
| 80 | + |
| 81 | + return folders |
| 82 | + |
| 83 | + |
| 84 | +################################################################################### |
| 85 | +# TAGGING ######################################################################### |
| 86 | +################################################################################### |
| 87 | + |
| 88 | + |
| 89 | +def estimate_os(container=None,package=None,sudopw=None,return_top=True): |
| 90 | + '''estimate os will compare a package to singularity python's database of |
| 91 | + operating system images, and return the docker image most similar |
| 92 | + :param return_top: return only the most similar (estimated os) default True |
| 93 | + :param package: the package created from the image to estimate. |
| 94 | + ''' |
| 95 | + if package == None: |
| 96 | + package = make_package(container,remove_image=True,sudopw=sudopw) |
| 97 | + |
| 98 | + comparison = compare_packages(packages_set1=[package])['files.txt'].transpose() |
| 99 | + comparison.columns = ['SCORE'] |
| 100 | + most_similar = comparison['SCORE'].idxmax() |
| 101 | + print("Most similar OS found to be ",most_similar) |
| 102 | + if return_top == True: |
| 103 | + return most_similar |
| 104 | + return comparison |
| 105 | + |
| 106 | + |
| 107 | +def get_tags(container=None,package=None,sudopw=None,search_folders=None): |
| 108 | + '''get tags will return a list of tags that describe the software in an image, |
| 109 | + meaning inside of a paricular folder. If search_folder is not defined, uses lib |
| 110 | + :param container: if provided, will use container as image. Can also provide |
| 111 | + :param package: if provided, can be used instead of container |
| 112 | + :param search_folders: specify one or more folders to look for tags |
| 113 | + Default is 'bin' |
| 114 | +
|
| 115 | + ::notes |
| 116 | + |
| 117 | + The algorithm works as follows: |
| 118 | + 1) first compare package to set of base OS (provided with shub) |
| 119 | + 2) subtract the most similar os from image, leaving "custom" files |
| 120 | + 3) organize custom files into dict based on folder name |
| 121 | + 4) return search_folders as tags |
| 122 | + ''' |
| 123 | + folders = get_diff(container=container, |
| 124 | + package=package, |
| 125 | + sudopw=sudopw) |
| 126 | + |
| 127 | + if search_folders == None: |
| 128 | + search_folders = 'bin' |
| 129 | + |
| 130 | + if not isinstance(search_folders,list): |
| 131 | + search_folders = [search_folders] |
| 132 | + |
| 133 | + tags = [] |
| 134 | + for search_folder in search_folders: |
| 135 | + if search_folder in folders: |
| 136 | + bot.logger.info("Adding tags for folder %s",search_folder) |
| 137 | + tags = tags + folders[search_folder] |
| 138 | + else: |
| 139 | + bot.logger.info("Did not find folder %s in difference.",search_folder) |
| 140 | + tags = numpy.unique(tags).tolist() |
| 141 | + return tags |
0 commit comments