Skip to content

Commit dd088d1

Browse files
committed
adding example to estimate os, and updating builder to derive tags and most similar os
1 parent f8948b2 commit dd088d1

File tree

7 files changed

+157
-11
lines changed

7 files changed

+157
-11
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/usr/bin/env python
2+
3+
# This is an example of generating image packages from within python
4+
5+
from singularity.analysis.classify import estimate_os
6+
7+
package = "python:3.6.0.img.zip"
8+
9+
# We can obtain the estimated os (top match)
10+
estimated_os = estimate_os(package=package)
11+
# Most similar OS found to be %s debian:7.11
12+
13+
# We can also get the whole list and values
14+
os_similarity = estimate_os(package=package,return_top=False)
15+
16+
File renamed without changes.

singularity/analysis/classify.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#!/usr/bin/env python
2+
3+
'''
4+
classify.py: part of singularity package
5+
functions to tag and classify images
6+
7+
'''
8+
9+
from glob import glob
10+
import json
11+
import os
12+
import re
13+
import requests
14+
from singularity.logman import bot
15+
from singularity.analysis.compare import (
16+
compare_packages,
17+
compare_containers
18+
)
19+
from singularity.analysis.utils import get_package_base
20+
from singularity.package import package as make_package
21+
from singularity.utils import get_installdir
22+
from singularity.views.utils import get_container_contents
23+
24+
from singularity.package import (
25+
load_package,
26+
package
27+
)
28+
29+
import numpy
30+
import pandas
31+
import shutil
32+
import sys
33+
import tempfile
34+
import zipfile
35+
36+
37+
38+
def get_diff(container=None,package=None,sudopw=None):
39+
'''get diff will return a dictionary of folder paths and files that
40+
are in an image or package vs. all standard operating systems. The
41+
algorithm is explained below.
42+
:param container: if provided, will use container as image. Can also provide
43+
:param package: if provided, can be used instead of container
44+
:param sudopw: needed if a package isn't provided (will prompt user)
45+
46+
::notes
47+
48+
The algorithm works as follows:
49+
1) first compare package to set of base OS (provided with shub)
50+
2) subtract the most similar os from image, leaving "custom" files
51+
3) organize custom files into dict based on folder name
52+
53+
'''
54+
if package == None:
55+
package = make_package(container,remove_image=True,sudopw=sudopw)
56+
57+
# Find the most similar os
58+
most_similar = estimate_os(package=package,sudopw=sudopw)
59+
similar_package = "%s/docker-os/%s.img.zip" %(get_package_base(),most_similar)
60+
61+
comparison = compare_containers(image_package1=package,
62+
image_package2=similar_package,
63+
by='files.txt')['files.txt']
64+
65+
container_unique = comparison['unique1']
66+
67+
# Try to organize files based on common folders:
68+
folders = dict()
69+
for file_path in container_unique:
70+
fileparts = file_path.split('/')
71+
if len(fileparts) >= 2:
72+
folder = fileparts[-2]
73+
else:
74+
folder = '/'
75+
filey = fileparts[-1]
76+
if folder in folders:
77+
folders[folder].append(filey)
78+
else:
79+
folders[folder] = [filey]
80+
81+
return folders
82+
83+
84+
###################################################################################
85+
# TAGGING #########################################################################
86+
###################################################################################
87+
88+
89+
def estimate_os(container=None,package=None,sudopw=None,return_top=True):
90+
'''estimate os will compare a package to singularity python's database of
91+
operating system images, and return the docker image most similar
92+
:param return_top: return only the most similar (estimated os) default True
93+
:param package: the package created from the image to estimate.
94+
'''
95+
if package == None:
96+
package = make_package(container,remove_image=True,sudopw=sudopw)
97+
98+
comparison = compare_packages(packages_set1=[package])['files.txt'].transpose()
99+
comparison.columns = ['SCORE']
100+
most_similar = comparison['SCORE'].idxmax()
101+
print("Most similar OS found to be ",most_similar)
102+
if return_top == True:
103+
return most_similar
104+
return comparison
105+
106+
107+
def get_tags(container=None,package=None,sudopw=None,search_folders=None):
108+
'''get tags will return a list of tags that describe the software in an image,
109+
meaning inside of a paricular folder. If search_folder is not defined, uses lib
110+
:param container: if provided, will use container as image. Can also provide
111+
:param package: if provided, can be used instead of container
112+
:param search_folders: specify one or more folders to look for tags
113+
Default is 'bin'
114+
115+
::notes
116+
117+
The algorithm works as follows:
118+
1) first compare package to set of base OS (provided with shub)
119+
2) subtract the most similar os from image, leaving "custom" files
120+
3) organize custom files into dict based on folder name
121+
4) return search_folders as tags
122+
'''
123+
folders = get_diff(container=container,
124+
package=package,
125+
sudopw=sudopw)
126+
127+
if search_folders == None:
128+
search_folders = 'bin'
129+
130+
if not isinstance(search_folders,list):
131+
search_folders = [search_folders]
132+
133+
tags = []
134+
for search_folder in search_folders:
135+
if search_folder in folders:
136+
bot.logger.info("Adding tags for folder %s",search_folder)
137+
tags = tags + folders[search_folder]
138+
else:
139+
bot.logger.info("Did not find folder %s in difference.",search_folder)
140+
tags = numpy.unique(tags).tolist()
141+
return tags

singularity/analysis/compare.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,6 @@ def compare_containers(container1=None,container2=None,by=None,
7272
"unique2": unique2,
7373
"total1": len(container1_guts[b]),
7474
"total2": len(container2_guts[b])}
75-
76-
bot.logger.info("Intersect has length %s",len(intersect))
77-
bot.logger.info("Unique to 1: %s",len(unique1))
78-
bot.logger.info("Unique to 2: %s",len(unique2))
7975
comparisons[b] = comparison
8076

8177
return comparisons

singularity/build/google.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,11 +183,6 @@ def run_build(build_dir=None,spec_file=None,repo_url=None,token=None,size=None,b
183183
with zipfile.ZipFile(image_package) as zf:
184184
zf.extractall(dest_dir)
185185

186-
# Generate tags based on software
187-
#TODO:
188-
# generate list of shared paths across operating systems
189-
# figure out how to generate list of tags...
190-
191186
# The path to the images on google drive will be the github url/commit folder
192187
image_path = "%s/%s" %(re.sub('^http.+//www[.]','',params['repo_url']),params['commit'])
193188
build_files = glob("%s/*" %(dest_dir))

singularity/views/utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ def get_container_contents(container=None,gets=None,split_delim=None,image_packa
4848

4949
# Visualization deployed local or elsewhere
5050
if SINGULARITY_HUB == "False":
51-
bot.logger.debug("Not running from Singularity Hub.")
5251
tmpdir = tempfile.mkdtemp()
5352
if image_package == None:
5453
image_package = package(image_path=container,
@@ -63,7 +62,6 @@ def get_container_contents(container=None,gets=None,split_delim=None,image_packa
6362

6463
# Visualization deployed by singularity hub
6564
else:
66-
bot.logger.debug("Running from Singularity Hub.")
6765
for sfile in container.files:
6866
for gut_key in gets:
6967
if os.path.basename(sfile['name']) == gut_key:

0 commit comments

Comments
 (0)